From 303a2d0863d7a86e96dd3c32655f1a044dc6bffe Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Wed, 13 Sep 2023 00:00:24 +0530
Subject: [PATCH 001/156] build(ingest): upgrade to sqlalchemy 1.4, drop 1.3
 support (#8810)

Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
---
 docs/how/updating-datahub.md                  |  1 +
 metadata-ingestion/build.gradle               |  3 --
 .../scripts/install-sqlalchemy-stubs.sh       | 28 ---------------
 metadata-ingestion/setup.py                   | 25 ++++++-------
 .../source/datahub/datahub_database_reader.py |  6 +---
 .../source/snowflake/snowflake_usage_v2.py    |  9 +----
 .../ingestion/source/sql/clickhouse.py        | 35 +------------------
 .../source/usage/clickhouse_usage.py          |  6 +---
 .../ingestion/source/usage/redshift_usage.py  |  4 +--
 .../source/usage/starburst_trino_usage.py     |  6 +---
 10 files changed, 17 insertions(+), 106 deletions(-)
 delete mode 100755 metadata-ingestion/scripts/install-sqlalchemy-stubs.sh

diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md
index 1ef7413a88ebd..9b19291ee246a 100644
--- a/docs/how/updating-datahub.md
+++ b/docs/how/updating-datahub.md
@@ -5,6 +5,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
 ## Next
 
 ### Breaking Changes
+- #8810 - Removed support for SQLAlchemy 1.3.x. Only SQLAlchemy 1.4.x is supported now.
 
 ### Potential Downtime
 
diff --git a/metadata-ingestion/build.gradle b/metadata-ingestion/build.gradle
index 199ccc59c21e0..408ea771bc93f 100644
--- a/metadata-ingestion/build.gradle
+++ b/metadata-ingestion/build.gradle
@@ -71,7 +71,6 @@ task installDev(type: Exec, dependsOn: [install]) {
   commandLine 'bash', '-c',
     "source ${venv_name}/bin/activate && set -x && " +
     "${venv_name}/bin/pip install -e .[dev] ${extra_pip_requirements} && " +
-    "./scripts/install-sqlalchemy-stubs.sh && " +
     "touch ${sentinel_file}"
 }
 
@@ -82,7 +81,6 @@ task installAll(type: Exec, dependsOn: [install]) {
   commandLine 'bash', '-c',
     "source ${venv_name}/bin/activate && set -x && " +
     "${venv_name}/bin/pip install -e .[all] ${extra_pip_requirements} && " +
-    "./scripts/install-sqlalchemy-stubs.sh && " +
     "touch ${sentinel_file}"
 }
 
@@ -119,7 +117,6 @@ task lint(type: Exec, dependsOn: installDev) {
 task lintFix(type: Exec, dependsOn: installDev) {
   commandLine 'bash', '-c',
     "source ${venv_name}/bin/activate && set -x && " +
-    "./scripts/install-sqlalchemy-stubs.sh && " +
     "black src/ tests/ examples/ && " +
     "isort src/ tests/ examples/ && " +
     "flake8 src/ tests/ examples/ && " +
diff --git a/metadata-ingestion/scripts/install-sqlalchemy-stubs.sh b/metadata-ingestion/scripts/install-sqlalchemy-stubs.sh
deleted file mode 100755
index 7c14a06464f99..0000000000000
--- a/metadata-ingestion/scripts/install-sqlalchemy-stubs.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-# ASSUMPTION: This assumes that we're running from inside the venv.
-
-SQLALCHEMY_VERSION=$(python -c 'import sqlalchemy; print(sqlalchemy.__version__)')
-
-if [[ $SQLALCHEMY_VERSION == 1.3.* ]]; then
-	ENSURE_NOT_INSTALLED=sqlalchemy2-stubs
-	ENSURE_INSTALLED=sqlalchemy-stubs
-elif [[ $SQLALCHEMY_VERSION == 1.4.* ]]; then
-	ENSURE_NOT_INSTALLED=sqlalchemy-stubs
-	ENSURE_INSTALLED=sqlalchemy2-stubs
-else
-	echo "Unsupported SQLAlchemy version: $SQLALCHEMY_VERSION"
-	exit 1
-fi
-
-FORCE_REINSTALL=""
-if pip show $ENSURE_NOT_INSTALLED >/dev/null 2>&1 ; then
-	pip uninstall --yes $ENSURE_NOT_INSTALLED
-	FORCE_REINSTALL="--force-reinstall"
-fi
-
-if [ -n "$FORCE_REINSTALL" ] || ! pip show $ENSURE_INSTALLED >/dev/null 2>&1 ; then
-	pip install $FORCE_REINSTALL $ENSURE_INSTALLED
-fi
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index d8668e8925546..09f71fa769fd3 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -112,7 +112,8 @@ def get_long_description():
 
 sql_common = {
     # Required for all SQL sources.
-    "sqlalchemy>=1.3.24, <2",
+    # This is temporary lower bound that we're open to loosening/tightening as requirements show up
+    "sqlalchemy>=1.4.39, <2",
     # Required for SQL profiling.
     "great-expectations>=0.15.12, <=0.15.50",
     # scipy version restricted to reduce backtracking, used by great-expectations,
@@ -172,13 +173,13 @@ def get_long_description():
 }
 
 clickhouse_common = {
-    # Clickhouse 0.1.8 requires SQLAlchemy 1.3.x, while the newer versions
-    # allow SQLAlchemy 1.4.x.
-    "clickhouse-sqlalchemy>=0.1.8",
+    # Clickhouse 0.2.0 adds support for SQLAlchemy 1.4.x
+    "clickhouse-sqlalchemy>=0.2.0",
 }
 
 redshift_common = {
-    "sqlalchemy-redshift",
+    # Clickhouse 0.8.3 adds support for SQLAlchemy 1.4.x
+    "sqlalchemy-redshift>=0.8.3",
     "psycopg2-binary",
     "GeoAlchemy2",
     *sqllineage_lib,
@@ -188,13 +189,8 @@ def get_long_description():
 snowflake_common = {
     # Snowflake plugin utilizes sql common
     *sql_common,
-    # Required for all Snowflake sources.
-    # See https://github.com/snowflakedb/snowflake-sqlalchemy/issues/234 for why 1.2.5 is blocked.
-    "snowflake-sqlalchemy>=1.2.4, !=1.2.5",
-    # Because of https://github.com/snowflakedb/snowflake-sqlalchemy/issues/350 we need to restrict SQLAlchemy's max version.
-    # Eventually we should just require snowflake-sqlalchemy>=1.4.3, but I won't do that immediately
-    # because it may break Airflow users that need SQLAlchemy 1.3.x.
-    "SQLAlchemy<1.4.42",
+    # https://github.com/snowflakedb/snowflake-sqlalchemy/issues/350
+    "snowflake-sqlalchemy>=1.4.3",
     # See https://github.com/snowflakedb/snowflake-connector-python/pull/1348 for why 2.8.2 is blocked
     "snowflake-connector-python!=2.8.2",
     "pandas",
@@ -206,9 +202,7 @@ def get_long_description():
 }
 
 trino = {
-    # Trino 0.317 broke compatibility with SQLAlchemy 1.3.24.
-    # See https://github.com/trinodb/trino-python-client/issues/250.
-    "trino[sqlalchemy]>=0.308, !=0.317",
+    "trino[sqlalchemy]>=0.308",
 }
 
 pyhive_common = {
@@ -430,6 +424,7 @@ def get_long_description():
     "types-Deprecated",
     "types-protobuf>=4.21.0.1",
     "types-tzlocal",
+    "sqlalchemy2-stubs",
 }
 
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py
index a5aadbd6e246b..96184d8d445e4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py
@@ -69,11 +69,7 @@ def get_aspects(
                     return
 
                 for i, row in enumerate(rows):
-                    # TODO: Replace with namedtuple usage once we drop sqlalchemy 1.3
-                    if hasattr(row, "_asdict"):
-                        row_dict = row._asdict()
-                    else:
-                        row_dict = dict(row)
+                    row_dict = row._asdict()
                     mcp = self._parse_row(row_dict)
                     if mcp:
                         yield mcp, row_dict["createdon"]
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py
index d041d219c4bdd..1cbd4a3b3ea24 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py
@@ -451,17 +451,10 @@ def _get_operation_aspect_work_unit(
                 yield wu
 
     def _process_snowflake_history_row(
-        self, row: Any
+        self, event_dict: dict
     ) -> Iterable[SnowflakeJoinedAccessEvent]:
         try:  # big hammer try block to ensure we don't fail on parsing events
             self.report.rows_processed += 1
-            # Make some minor type conversions.
-            if hasattr(row, "_asdict"):
-                # Compat with SQLAlchemy 1.3 and 1.4
-                # See https://docs.sqlalchemy.org/en/14/changelog/migration_14.html#rowproxy-is-no-longer-a-proxy-is-now-called-row-and-behaves-like-an-enhanced-named-tuple.
-                event_dict = row._asdict()
-            else:
-                event_dict = dict(row)
 
             # no use processing events that don't have a query text
             if not event_dict["QUERY_TEXT"]:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
index 20130ef21e5e6..1626f86b92545 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
@@ -38,7 +38,6 @@
     logger,
     register_custom_type,
 )
-from datahub.ingestion.source.sql.sql_config import make_sqlalchemy_uri
 from datahub.ingestion.source.sql.two_tier_sql_source import (
     TwoTierSQLAlchemyConfig,
     TwoTierSQLAlchemySource,
@@ -147,7 +146,6 @@ class ClickHouseConfig(
     include_materialized_views: Optional[bool] = Field(default=True, description="")
 
     def get_sql_alchemy_url(self, current_db=None):
-
         url = make_url(
             super().get_sql_alchemy_url(uri_opts=self.uri_opts, current_db=current_db)
         )
@@ -158,42 +156,11 @@ def get_sql_alchemy_url(self, current_db=None):
             )
 
         # We can setup clickhouse ingestion in sqlalchemy_uri form and config form.
-
-        # If we use sqlalchemu_uri form then super().get_sql_alchemy_url doesn't
-        # update current_db because it return self.sqlalchemy_uri without any update.
-        # This code bellow needed for rewriting sqlalchemi_uri and replace database with current_db.from
-        # For the future without python3.7 and sqlalchemy 1.3 support we can use code
-        # url=url.set(db=current_db), but not now.
-
         # Why we need to update database in uri at all?
         # Because we get database from sqlalchemy inspector and inspector we form from url inherited from
         # TwoTierSQLAlchemySource and SQLAlchemySource
-
         if self.sqlalchemy_uri and current_db:
-            self.scheme = url.drivername
-            self.username = url.username
-            self.password = (
-                pydantic.SecretStr(str(url.password))
-                if url.password
-                else pydantic.SecretStr("")
-            )
-            if url.host and url.port:
-                self.host_port = url.host + ":" + str(url.port)
-            elif url.host:
-                self.host_port = url.host
-            # untill released https://github.com/python/mypy/pull/15174
-            self.uri_opts = {str(k): str(v) for (k, v) in url.query.items()}
-
-            url = make_url(
-                make_sqlalchemy_uri(
-                    self.scheme,
-                    self.username,
-                    self.password.get_secret_value() if self.password else None,
-                    self.host_port,
-                    current_db if current_db else self.database,
-                    uri_opts=self.uri_opts,
-                )
-            )
+            url = url.set(database=current_db)
 
         return str(url)
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py
index 855958f0755e1..f659ea0c1c5c0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/usage/clickhouse_usage.py
@@ -143,11 +143,7 @@ def _get_clickhouse_history(self):
         results = engine.execute(query)
         events = []
         for row in results:
-            # minor type conversion
-            if hasattr(row, "_asdict"):
-                event_dict = row._asdict()
-            else:
-                event_dict = dict(row)
+            event_dict = row._asdict()
 
             # stripping extra spaces caused by above _asdict() conversion
             for k, v in event_dict.items():
diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/redshift_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/redshift_usage.py
index 99a980b326e53..691eaa8211054 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/usage/redshift_usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/usage/redshift_usage.py
@@ -298,9 +298,7 @@ def _gen_access_events_from_history_query(
         for row in results:
             if not self._should_process_row(row):
                 continue
-            if hasattr(row, "_asdict"):
-                # Compatibility with sqlalchemy 1.4.x.
-                row = row._asdict()
+            row = row._asdict()
             access_event = RedshiftAccessEvent(**dict(row.items()))
             # Replace database name with the alias name if one is provided in the config.
             if self.config.database_alias:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py
index 9394a8bba5e0b..c38800b3a6983 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py
@@ -162,11 +162,7 @@ def _get_trino_history(self):
         results = engine.execute(query)
         events = []
         for row in results:
-            # minor type conversion
-            if hasattr(row, "_asdict"):
-                event_dict = row._asdict()
-            else:
-                event_dict = dict(row)
+            event_dict = row._asdict()
 
             # stripping extra spaces caused by above _asdict() conversion
             for k, v in event_dict.items():

From f7fee743bfddf27f072e5c56512ef905d942eab6 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Tue, 12 Sep 2023 13:11:01 -0700
Subject: [PATCH 002/156] fix(ingest): use epoch 1 for dev build versions
 (#8824)

---
 docker/datahub-ingestion-base/smoke.Dockerfile                | 2 +-
 docker/datahub-ingestion/Dockerfile                           | 4 ++--
 docker/datahub-ingestion/Dockerfile-slim-only                 | 2 +-
 metadata-ingestion-modules/airflow-plugin/scripts/release.sh  | 2 +-
 .../airflow-plugin/src/datahub_airflow_plugin/__init__.py     | 2 +-
 metadata-ingestion/scripts/release.sh                         | 2 +-
 metadata-ingestion/src/datahub/__init__.py                    | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/docker/datahub-ingestion-base/smoke.Dockerfile b/docker/datahub-ingestion-base/smoke.Dockerfile
index 276f6dbc4436e..15dc46ae5b882 100644
--- a/docker/datahub-ingestion-base/smoke.Dockerfile
+++ b/docker/datahub-ingestion-base/smoke.Dockerfile
@@ -20,7 +20,7 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get install -y  openjdk-11-jdk
 COPY . /datahub-src
 ARG RELEASE_VERSION
 RUN cd /datahub-src/metadata-ingestion && \
-    sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \
+    sed -i.bak "s/__version__ = \"1!0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \
     cat src/datahub/__init__.py && \
     cd ../ && \
     ./gradlew :metadata-ingestion:installAll
diff --git a/docker/datahub-ingestion/Dockerfile b/docker/datahub-ingestion/Dockerfile
index 2ceff6a800ebb..8b726df5e8842 100644
--- a/docker/datahub-ingestion/Dockerfile
+++ b/docker/datahub-ingestion/Dockerfile
@@ -11,8 +11,8 @@ COPY ./metadata-ingestion-modules/airflow-plugin /datahub-ingestion/airflow-plug
 
 ARG RELEASE_VERSION
 WORKDIR /datahub-ingestion
-RUN sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \
-    sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" airflow-plugin/src/datahub_airflow_plugin/__init__.py && \
+RUN sed -i.bak "s/__version__ = \"1!0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \
+    sed -i.bak "s/__version__ = \"1!0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" airflow-plugin/src/datahub_airflow_plugin/__init__.py && \
     cat src/datahub/__init__.py && \
     chown -R datahub /datahub-ingestion
 
diff --git a/docker/datahub-ingestion/Dockerfile-slim-only b/docker/datahub-ingestion/Dockerfile-slim-only
index 678bee7e306f6..9ae116f839aa0 100644
--- a/docker/datahub-ingestion/Dockerfile-slim-only
+++ b/docker/datahub-ingestion/Dockerfile-slim-only
@@ -9,7 +9,7 @@ COPY ./metadata-ingestion /datahub-ingestion
 
 ARG RELEASE_VERSION
 WORKDIR /datahub-ingestion
-RUN sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \
+RUN sed -i.bak "s/__version__ = \"1!0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \
     cat src/datahub/__init__.py && \
     chown -R datahub /datahub-ingestion
 
diff --git a/metadata-ingestion-modules/airflow-plugin/scripts/release.sh b/metadata-ingestion-modules/airflow-plugin/scripts/release.sh
index 7134187a45885..87157479f37d6 100755
--- a/metadata-ingestion-modules/airflow-plugin/scripts/release.sh
+++ b/metadata-ingestion-modules/airflow-plugin/scripts/release.sh
@@ -13,7 +13,7 @@ MODULE=datahub_airflow_plugin
 python -c 'import setuptools; where="./src"; assert setuptools.find_packages(where) == setuptools.find_namespace_packages(where), "you seem to be missing or have extra __init__.py files"'
 if [[ ${RELEASE_VERSION:-} ]]; then
     # Replace version with RELEASE_VERSION env variable
-    sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/${MODULE}/__init__.py
+    sed -i.bak "s/__version__ = \"1!0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/${MODULE}/__init__.py
 else
     vim src/${MODULE}/__init__.py
 fi
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py
index ce98a0fc1fb60..b2c45d3a1e75d 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py
@@ -1,6 +1,6 @@
 # Published at https://pypi.org/project/acryl-datahub/.
 __package_name__ = "acryl-datahub-airflow-plugin"
-__version__ = "0.0.0.dev0"
+__version__ = "1!0.0.0.dev0"
 
 
 def is_dev_mode() -> bool:
diff --git a/metadata-ingestion/scripts/release.sh b/metadata-ingestion/scripts/release.sh
index 0a09c4e0307b3..eacaf1d920a8d 100755
--- a/metadata-ingestion/scripts/release.sh
+++ b/metadata-ingestion/scripts/release.sh
@@ -11,7 +11,7 @@ fi
 python -c 'import setuptools; where="./src"; assert setuptools.find_packages(where) == setuptools.find_namespace_packages(where), "you seem to be missing or have extra __init__.py files"'
 if [[ ${RELEASE_VERSION:-} ]]; then
     # Replace version with RELEASE_VERSION env variable
-    sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py
+    sed -i.bak "s/__version__ = \"1!0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py
 else
     vim src/datahub/__init__.py
 fi
diff --git a/metadata-ingestion/src/datahub/__init__.py b/metadata-ingestion/src/datahub/__init__.py
index 3ac3efefc14f0..a470de7b500be 100644
--- a/metadata-ingestion/src/datahub/__init__.py
+++ b/metadata-ingestion/src/datahub/__init__.py
@@ -3,7 +3,7 @@
 
 # Published at https://pypi.org/project/acryl-datahub/.
 __package_name__ = "acryl-datahub"
-__version__ = "0.0.0.dev0"
+__version__ = "1!0.0.0.dev0"
 
 
 def is_dev_mode() -> bool:

From 449cc9ba91bfc51bc8e5a66de7920340f164f272 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Tue, 12 Sep 2023 13:15:05 -0700
Subject: [PATCH 003/156] ci: make wheel builds more robust (#8815)

---
 docs-website/sphinx/Makefile                   |  5 ++++-
 docs-website/sphinx/requirements.txt           |  2 +-
 docs-website/yarn.lock                         | 18 +++++++++++-------
 .../airflow-plugin/build.gradle                |  6 +++---
 metadata-ingestion/build.gradle                |  6 +++---
 5 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/docs-website/sphinx/Makefile b/docs-website/sphinx/Makefile
index 00ece7ae25331..c01b45e322c67 100644
--- a/docs-website/sphinx/Makefile
+++ b/docs-website/sphinx/Makefile
@@ -22,7 +22,7 @@ $(VENV_SENTINEL): requirements.txt
 	$(VENV_DIR)/bin/pip install -r requirements.txt
 	touch $(VENV_SENTINEL)
 
-.PHONY: help html doctest linkcheck clean serve md
+.PHONY: help html doctest linkcheck clean clean_all serve md
 
 # Not using Python's http.server because it enables caching headers.
 serve:
@@ -35,3 +35,6 @@ md: html
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
 html doctest linkcheck clean: venv Makefile
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+clean_all: clean
+	-rm -rf $(VENV_DIR)
diff --git a/docs-website/sphinx/requirements.txt b/docs-website/sphinx/requirements.txt
index a63fd05853259..94ddd40579f0e 100644
--- a/docs-website/sphinx/requirements.txt
+++ b/docs-website/sphinx/requirements.txt
@@ -1,4 +1,4 @@
--e ../../metadata-ingestion[datahub-rest]
+-e ../../metadata-ingestion[datahub-rest,sql-parsing]
 beautifulsoup4==4.11.2
 Sphinx==6.1.3
 sphinx-click==4.4.0
diff --git a/docs-website/yarn.lock b/docs-website/yarn.lock
index 209a57a43dab0..0613fe71ef78e 100644
--- a/docs-website/yarn.lock
+++ b/docs-website/yarn.lock
@@ -2986,6 +2986,13 @@
   dependencies:
     "@types/node" "*"
 
+"@types/websocket@^1.0.3":
+  version "1.0.6"
+  resolved "https://registry.yarnpkg.com/@types/websocket/-/websocket-1.0.6.tgz#ec8dce5915741632ac3a4b1f951b6d4156e32d03"
+  integrity sha512-JXkliwz93B2cMWOI1ukElQBPN88vMg3CruvW4KVSKpflt3NyNCJImnhIuB/f97rG7kakqRJGFiwkA895Kn02Dg==
+  dependencies:
+    "@types/node" "*"
+
 "@types/ws@^8.5.5":
   version "8.5.5"
   resolved "https://registry.yarnpkg.com/@types/ws/-/ws-8.5.5.tgz#af587964aa06682702ee6dcbc7be41a80e4b28eb"
@@ -7053,7 +7060,6 @@ node-forge@^1:
   resolved "https://registry.yarnpkg.com/node-forge/-/node-forge-1.3.1.tgz#be8da2af243b2417d5f646a770663a92b7e9ded3"
   integrity sha512-dPEtOeMvF9VMcYV/1Wb8CPoVAXtp6MKMlcbAt4ddqmGqUJ6fQZFXkNZNkNlfevtNkGtaSoXf/vNNNSvgrdXwtA==
 
-
 node-gyp-build@^4.3.0:
   version "4.6.1"
   resolved "https://registry.yarnpkg.com/node-gyp-build/-/node-gyp-build-4.6.1.tgz#24b6d075e5e391b8d5539d98c7fc5c210cac8a3e"
@@ -9903,6 +9909,10 @@ use-sidecar@^1.1.2:
     detect-node-es "^1.1.0"
     tslib "^2.0.0"
 
+use-sync-external-store@^1.2.0:
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/use-sync-external-store/-/use-sync-external-store-1.2.0.tgz#7dbefd6ef3fe4e767a0cf5d7287aacfb5846928a"
+  integrity sha512-eEgnFxGQ1Ife9bzYs6VLi8/4X6CObHMw9Qr9tPY43iKwsPw8xE8+EFsf/2cFZ5S3esXgpWgtSCtLNS41F+sKPA==
 
 utf-8-validate@^5.0.2:
   version "5.0.10"
@@ -9911,12 +9921,6 @@ utf-8-validate@^5.0.2:
   dependencies:
     node-gyp-build "^4.3.0"
 
-use-sync-external-store@^1.2.0:
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/use-sync-external-store/-/use-sync-external-store-1.2.0.tgz#7dbefd6ef3fe4e767a0cf5d7287aacfb5846928a"
-  integrity sha512-eEgnFxGQ1Ife9bzYs6VLi8/4X6CObHMw9Qr9tPY43iKwsPw8xE8+EFsf/2cFZ5S3esXgpWgtSCtLNS41F+sKPA==
-
-
 util-deprecate@^1.0.1, util-deprecate@^1.0.2, util-deprecate@~1.0.1:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf"
diff --git a/metadata-ingestion-modules/airflow-plugin/build.gradle b/metadata-ingestion-modules/airflow-plugin/build.gradle
index d1e6f2f646491..58a2bc9e670e3 100644
--- a/metadata-ingestion-modules/airflow-plugin/build.gradle
+++ b/metadata-ingestion-modules/airflow-plugin/build.gradle
@@ -110,14 +110,14 @@ task testFull(type: Exec, dependsOn: [testQuick, installDevTest]) {
   commandLine 'bash', '-x', '-c',
     "source ${venv_name}/bin/activate && pytest -m 'not slow_integration' -vv --continue-on-collection-errors --junit-xml=junit.full.xml"
 }
-task buildWheel(type: Exec, dependsOn: [install]) {
-  commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_TEST=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh'
-}
 
 task cleanPythonCache(type: Exec) {
   commandLine 'bash', '-c',
     "find src -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete -o -type d -empty -delete"
 }
+task buildWheel(type: Exec, dependsOn: [install, cleanPythonCache]) {
+  commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_TEST=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh'
+}
 
 build.dependsOn install
 check.dependsOn lint
diff --git a/metadata-ingestion/build.gradle b/metadata-ingestion/build.gradle
index 408ea771bc93f..c20d98cbcbb58 100644
--- a/metadata-ingestion/build.gradle
+++ b/metadata-ingestion/build.gradle
@@ -185,9 +185,6 @@ task specGen(type: Exec, dependsOn: [codegen, installDevTest]) {
 task docGen(type: Exec, dependsOn: [codegen, installDevTest, specGen]) {
   commandLine 'bash', '-c', "source ${venv_name}/bin/activate && ./scripts/docgen.sh"
 }
-task buildWheel(type: Exec, dependsOn: [install, codegen]) {
-  commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_TEST=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh'
-}
 
 
 
@@ -195,6 +192,9 @@ task cleanPythonCache(type: Exec) {
   commandLine 'bash', '-c',
     "find src tests -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete -o -type d -empty -delete"
 }
+task buildWheel(type: Exec, dependsOn: [install, codegen, cleanPythonCache]) {
+  commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_TEST=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh'
+}
 
 build.dependsOn install
 check.dependsOn lint

From 138f6c0f74a4799d31560e9fde19ef6011089990 Mon Sep 17 00:00:00 2001
From: Pedro Silva <pedro@acryl.io>
Date: Tue, 12 Sep 2023 22:26:30 +0100
Subject: [PATCH 004/156] feat(cli): fix upload ingest cli endpoint (#8826)

---
 metadata-ingestion/src/datahub/cli/ingest_cli.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py
index 42c0ea1601c74..5931bf89b010b 100644
--- a/metadata-ingestion/src/datahub/cli/ingest_cli.py
+++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py
@@ -282,12 +282,14 @@ def deploy(
         "urn": urn,
         "name": name,
         "type": pipeline_config["source"]["type"],
-        "schedule": {"interval": schedule, "timezone": time_zone},
         "recipe": json.dumps(pipeline_config),
         "executorId": executor_id,
         "version": cli_version,
     }
 
+    if schedule is not None:
+        variables["schedule"] = {"interval": schedule, "timezone": time_zone}
+
     if urn:
         if not datahub_graph.exists(urn):
             logger.error(f"Could not find recipe for provided urn: {urn}")
@@ -331,6 +333,7 @@ def deploy(
                 $version: String) {
 
                 createIngestionSource(input: {
+                    name: $name,
                     type: $type,
                     schedule: $schedule,
                     config: {

From 3cc0f76d178f239acc018e06ec408eb6b38bfb5d Mon Sep 17 00:00:00 2001
From: Adriano Vega Llobell <Starkie@users.noreply.github.com>
Date: Tue, 12 Sep 2023 23:34:24 +0200
Subject: [PATCH 005/156] docs(transformer): fix names in sample code of
 'pattern_add_dataset_domain' (#8755)

---
 metadata-ingestion/docs/transformer/dataset_transformer.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/metadata-ingestion/docs/transformer/dataset_transformer.md b/metadata-ingestion/docs/transformer/dataset_transformer.md
index cb06656940918..f0fa44687a109 100644
--- a/metadata-ingestion/docs/transformer/dataset_transformer.md
+++ b/metadata-ingestion/docs/transformer/dataset_transformer.md
@@ -909,7 +909,7 @@ in both of the cases domain should be provisioned on DataHub GMS
 - Add domains, however replace existing domains sent by ingestion source
 ```yaml
     transformers:
-      - type: "pattern_add_dataset_ownership"
+      - type: "pattern_add_dataset_domain"
         config:
           replace_existing: true  # false is default behaviour
           domain_pattern:
@@ -920,7 +920,7 @@ in both of the cases domain should be provisioned on DataHub GMS
 - Add domains, however overwrite the domains available for the dataset on DataHub GMS
 ```yaml
       transformers:
-        - type: "pattern_add_dataset_ownership"
+        - type: "pattern_add_dataset_domain"
           config:
             semantics: OVERWRITE  # OVERWRITE is default behaviour 
             domain_pattern:
@@ -931,7 +931,7 @@ in both of the cases domain should be provisioned on DataHub GMS
 - Add domains, however keep the domains available for the dataset on DataHub GMS
 ```yaml
       transformers:
-        - type: "pattern_add_dataset_ownership"
+        - type: "pattern_add_dataset_domain"
           config:
             semantics: PATCH
             domain_pattern:

From 785ab7718df8e4e46bdd612ed3deaafbda1d42cc Mon Sep 17 00:00:00 2001
From: ethan-cartwright <ethan.cartwright.m@gmail.com>
Date: Wed, 13 Sep 2023 03:45:58 -0400
Subject: [PATCH 006/156] fix(siblingsHook): check number of dbtUpstreams
 instead of all upStreams (#8817)

Co-authored-by: Ethan Cartwright <ethan.cartwright@acryl.io>
---
 .../hook/siblings/SiblingAssociationHook.java |  19 ++-
 .../siblings/SiblingAssociationHookTest.java  | 112 ++++++++++++++----
 2 files changed, 100 insertions(+), 31 deletions(-)

diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java
index 2be719ed263ea..06545ef3525dd 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java
@@ -200,10 +200,19 @@ private void handleSourceDatasetEvent(MetadataChangeLog event, DatasetUrn source
       UpstreamLineage upstreamLineage = getUpstreamLineageFromEvent(event);
       if (upstreamLineage != null && upstreamLineage.hasUpstreams()) {
         UpstreamArray upstreams = upstreamLineage.getUpstreams();
-        if (
-            upstreams.size() == 1
-                && upstreams.get(0).getDataset().getPlatformEntity().getPlatformNameEntity().equals(DBT_PLATFORM_NAME)) {
-          setSiblingsAndSoftDeleteSibling(upstreams.get(0).getDataset(), sourceUrn);
+
+        // an entity can have merged lineage (eg. dbt + snowflake), but by default siblings are only between dbt <> non-dbt
+        UpstreamArray dbtUpstreams = new UpstreamArray(
+          upstreams.stream()
+          .filter(obj -> obj.getDataset().getPlatformEntity().getPlatformNameEntity().equals(DBT_PLATFORM_NAME))
+          .collect(Collectors.toList())
+        );
+        // We're assuming a data asset (eg. snowflake table) will only ever be downstream of 1 dbt model
+        if (dbtUpstreams.size() == 1) {
+          setSiblingsAndSoftDeleteSibling(dbtUpstreams.get(0).getDataset(), sourceUrn);
+        } else {
+          log.error("{} has an unexpected number of dbt upstreams: {}. Not adding any as siblings.", sourceUrn.toString(), dbtUpstreams.size());
+ 
         }
       }
     }
@@ -219,7 +228,7 @@ private void setSiblingsAndSoftDeleteSibling(Urn dbtUrn, Urn sourceUrn) {
         existingDbtSiblingAspect != null
             && existingSourceSiblingAspect != null
             && existingDbtSiblingAspect.getSiblings().contains(sourceUrn.toString())
-            && existingDbtSiblingAspect.getSiblings().contains(dbtUrn.toString())
+            && existingSourceSiblingAspect.getSiblings().contains(dbtUrn.toString())
     ) {
       // we have already connected them- we can abort here
       return;
diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java
index 5fb2cfaaef2d1..78d304d67bfc0 100644
--- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java
+++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java
@@ -36,6 +36,8 @@
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
+import java.net.URISyntaxException;
+
 import static com.linkedin.metadata.Constants.*;
 import static org.mockito.ArgumentMatchers.*;
 
@@ -78,15 +80,12 @@ public void testInvokeWhenThereIsAPairWithDbtSourceNode() throws Exception {
             _mockAuthentication
         )).thenReturn(mockResponse);
 
-    MetadataChangeLog event = new MetadataChangeLog();
-    event.setEntityType(DATASET_ENTITY_NAME);
-    event.setAspectName(UPSTREAM_LINEAGE_ASPECT_NAME);
-    event.setChangeType(ChangeType.UPSERT);
+
+    MetadataChangeLog event = createEvent(DATASET_ENTITY_NAME, UPSTREAM_LINEAGE_ASPECT_NAME, ChangeType.UPSERT);
+
+    Upstream upstream = createUpstream("urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj.jaffle_shop.customers,PROD)", DatasetLineageType.TRANSFORMED);
     final UpstreamLineage upstreamLineage = new UpstreamLineage();
     final UpstreamArray upstreamArray = new UpstreamArray();
-    final Upstream upstream = new Upstream();
-    upstream.setType(DatasetLineageType.TRANSFORMED);
-    upstream.setDataset(DatasetUrn.createFromString("urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj.jaffle_shop.customers,PROD)"));
 
     upstreamArray.add(upstream);
     upstreamLineage.setUpstreams(upstreamArray);
@@ -151,15 +150,11 @@ public void testInvokeWhenThereIsNoPairWithDbtModel() throws Exception {
             _mockAuthentication
         )).thenReturn(mockResponse);
 
-    MetadataChangeLog event = new MetadataChangeLog();
-    event.setEntityType(DATASET_ENTITY_NAME);
-    event.setAspectName(UPSTREAM_LINEAGE_ASPECT_NAME);
-    event.setChangeType(ChangeType.UPSERT);
+    MetadataChangeLog event = createEvent(DATASET_ENTITY_NAME, UPSTREAM_LINEAGE_ASPECT_NAME, ChangeType.UPSERT);
+    Upstream upstream = createUpstream("urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj.jaffle_shop.customers,PROD)", DatasetLineageType.TRANSFORMED);
+
     final UpstreamLineage upstreamLineage = new UpstreamLineage();
     final UpstreamArray upstreamArray = new UpstreamArray();
-    final Upstream upstream = new Upstream();
-    upstream.setType(DatasetLineageType.TRANSFORMED);
-    upstream.setDataset(DatasetUrn.createFromString("urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj.jaffle_shop.customers,PROD)"));
 
     upstreamArray.add(upstream);
     upstreamLineage.setUpstreams(upstreamArray);
@@ -189,15 +184,11 @@ public void testInvokeWhenThereIsNoPairWithDbtModel() throws Exception {
   public void testInvokeWhenThereIsAPairWithBigqueryDownstreamNode() throws Exception {
     Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true);
 
-    MetadataChangeLog event = new MetadataChangeLog();
-    event.setEntityType(DATASET_ENTITY_NAME);
-    event.setAspectName(UPSTREAM_LINEAGE_ASPECT_NAME);
-    event.setChangeType(ChangeType.UPSERT);
+
+    MetadataChangeLog event = createEvent(DATASET_ENTITY_NAME, UPSTREAM_LINEAGE_ASPECT_NAME, ChangeType.UPSERT);
     final UpstreamLineage upstreamLineage = new UpstreamLineage();
     final UpstreamArray upstreamArray = new UpstreamArray();
-    final Upstream upstream = new Upstream();
-    upstream.setType(DatasetLineageType.TRANSFORMED);
-    upstream.setDataset(DatasetUrn.createFromString("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.jaffle_shop.customers,PROD)"));
+    Upstream upstream = createUpstream("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.jaffle_shop.customers,PROD)", DatasetLineageType.TRANSFORMED);
 
     upstreamArray.add(upstream);
     upstreamLineage.setUpstreams(upstreamArray);
@@ -259,10 +250,7 @@ public void testInvokeWhenThereIsAKeyBeingReingested() throws Exception {
                         .setSkipAggregates(true).setSkipHighlighting(true))
         )).thenReturn(returnSearchResult);
 
-    MetadataChangeLog event = new MetadataChangeLog();
-    event.setEntityType(DATASET_ENTITY_NAME);
-    event.setAspectName(DATASET_KEY_ASPECT_NAME);
-    event.setChangeType(ChangeType.UPSERT);
+    MetadataChangeLog event = createEvent(DATASET_ENTITY_NAME, DATASET_KEY_ASPECT_NAME, ChangeType.UPSERT);
     final DatasetKey datasetKey = new DatasetKey();
     datasetKey.setName("my-proj.jaffle_shop.customers");
     datasetKey.setOrigin(FabricType.PROD);
@@ -304,4 +292,76 @@ public void testInvokeWhenThereIsAKeyBeingReingested() throws Exception {
         Mockito.eq(_mockAuthentication)
     );
   }
-}
+  @Test
+  public void testInvokeWhenSourceUrnHasTwoDbtUpstreams() throws Exception {
+
+    MetadataChangeLog event = createEvent(DATASET_ENTITY_NAME, UPSTREAM_LINEAGE_ASPECT_NAME, ChangeType.UPSERT);
+    final UpstreamLineage upstreamLineage = new UpstreamLineage();
+    final UpstreamArray upstreamArray = new UpstreamArray();
+    Upstream dbtUpstream1 = createUpstream("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.source_entity1,PROD)", DatasetLineageType.TRANSFORMED);
+    Upstream dbtUpstream2 = createUpstream("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.source_entity2,PROD)", DatasetLineageType.TRANSFORMED);
+    upstreamArray.add(dbtUpstream1);
+    upstreamArray.add(dbtUpstream2);
+    upstreamLineage.setUpstreams(upstreamArray);
+
+    event.setAspect(GenericRecordUtils.serializeAspect(upstreamLineage));
+    event.setEntityUrn(Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj.jaffle_shop.customers,PROD)"));
+    _siblingAssociationHook.invoke(event);
+
+
+    Mockito.verify(_mockEntityClient, Mockito.times(0)).ingestProposal(
+            Mockito.any(),
+            Mockito.eq(_mockAuthentication)
+    );
+
+
+  }
+
+  @Test
+  public void testInvokeWhenSourceUrnHasTwoUpstreamsOneDbt() throws Exception {
+
+    MetadataChangeLog event = createEvent(DATASET_ENTITY_NAME, UPSTREAM_LINEAGE_ASPECT_NAME, ChangeType.UPSERT);
+    final UpstreamLineage upstreamLineage = new UpstreamLineage();
+    final UpstreamArray upstreamArray = new UpstreamArray();
+    Upstream dbtUpstream = createUpstream("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.source_entity1,PROD)", DatasetLineageType.TRANSFORMED);
+    Upstream snowflakeUpstream =
+            createUpstream("urn:li:dataset:(urn:li:dataPlatform:snowflake,my-proj.jaffle_shop.customers,PROD)", DatasetLineageType.TRANSFORMED);
+    upstreamArray.add(dbtUpstream);
+    upstreamArray.add(snowflakeUpstream);
+    upstreamLineage.setUpstreams(upstreamArray);
+
+    event.setAspect(GenericRecordUtils.serializeAspect(upstreamLineage));
+    event.setEntityUrn(Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj.jaffle_shop.customers,PROD)"));
+    _siblingAssociationHook.invoke(event);
+
+
+    Mockito.verify(_mockEntityClient, Mockito.times(2)).ingestProposal(
+            Mockito.any(),
+            Mockito.eq(_mockAuthentication)
+    );
+
+
+  }
+
+  private MetadataChangeLog createEvent(String entityType, String aspectName, ChangeType changeType) {
+    MetadataChangeLog event = new MetadataChangeLog();
+    event.setEntityType(entityType);
+    event.setAspectName(aspectName);
+    event.setChangeType(changeType);
+    return event;
+  }
+  private Upstream createUpstream(String urn, DatasetLineageType upstreamType) {
+
+    final Upstream upstream = new Upstream();
+    upstream.setType(upstreamType);
+    try {
+      upstream.setDataset(DatasetUrn.createFromString(urn));
+    } catch (URISyntaxException e) {
+      throw new RuntimeException(e);
+    }
+
+    return upstream;
+  }
+
+
+  }

From e9b4727c8e270d22c80c4be7133a3315adbc5691 Mon Sep 17 00:00:00 2001
From: Chris Collins <chriscollins3456@gmail.com>
Date: Wed, 13 Sep 2023 11:18:52 -0400
Subject: [PATCH 007/156] fix(java) Update DataProductMapper to always return a
 name (#8832)

---
 .../types/dataproduct/mappers/DataProductMapper.java      | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataproduct/mappers/DataProductMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataproduct/mappers/DataProductMapper.java
index 9cb6840067e7b..254b43ecb96cc 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataproduct/mappers/DataProductMapper.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataproduct/mappers/DataProductMapper.java
@@ -50,7 +50,8 @@ public DataProduct apply(@Nonnull final EntityResponse entityResponse) {
 
     EnvelopedAspectMap aspectMap = entityResponse.getAspects();
     MappingHelper<DataProduct> mappingHelper = new MappingHelper<>(aspectMap, result);
-    mappingHelper.mapToResult(DATA_PRODUCT_PROPERTIES_ASPECT_NAME, this::mapDataProductProperties);
+    mappingHelper.mapToResult(DATA_PRODUCT_PROPERTIES_ASPECT_NAME, (dataProduct, dataMap) ->
+        mapDataProductProperties(dataProduct, dataMap, entityUrn));
     mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, (dataProduct, dataMap) ->
         dataProduct.setTags(GlobalTagsMapper.map(new GlobalTags(dataMap), entityUrn)));
     mappingHelper.mapToResult(GLOSSARY_TERMS_ASPECT_NAME, (dataProduct, dataMap) ->
@@ -65,11 +66,12 @@ public DataProduct apply(@Nonnull final EntityResponse entityResponse) {
     return result;
   }
 
-  private void mapDataProductProperties(@Nonnull DataProduct dataProduct, @Nonnull DataMap dataMap) {
+  private void mapDataProductProperties(@Nonnull DataProduct dataProduct, @Nonnull DataMap dataMap, @Nonnull Urn urn) {
     DataProductProperties dataProductProperties = new DataProductProperties(dataMap);
     com.linkedin.datahub.graphql.generated.DataProductProperties properties = new com.linkedin.datahub.graphql.generated.DataProductProperties();
 
-    properties.setName(dataProductProperties.getName());
+    final String name = dataProductProperties.hasName() ? dataProductProperties.getName() : urn.getId();
+    properties.setName(name);
     properties.setDescription(dataProductProperties.getDescription());
     if (dataProductProperties.hasExternalUrl()) {
       properties.setExternalUrl(dataProductProperties.getExternalUrl().toString());

From 1474ac01b19f47d1011dc836f0fceeb59bd1720d Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Wed, 13 Sep 2023 12:32:45 -0700
Subject: [PATCH 008/156] build(ingest): Bump jsonschema for Python >= 3.8
 (#8836)

---
 metadata-ingestion/setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 09f71fa769fd3..7a5fd355803cb 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -58,7 +58,8 @@ def get_long_description():
     "requests_file",
     "jsonref",
     # jsonschema drops python 3.7 support in v4.18.0
-    "jsonschema<=4.17.3",
+    "jsonschema<=4.17.3 ;  python_version < '3.8'",
+    "jsonschema>=4.18.0 ;  python_version >= '3.8'",
     "ruamel.yaml",
 }
 

From 493d31531a1ed829adc106ea7722c88c50b70270 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Wed, 13 Sep 2023 14:00:58 -0700
Subject: [PATCH 009/156] feat(ingest/rest-emitter): Do not raise error on
 retry failure to get better error messages (#8837)

---
 metadata-ingestion/src/datahub/emitter/rest_emitter.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/metadata-ingestion/src/datahub/emitter/rest_emitter.py b/metadata-ingestion/src/datahub/emitter/rest_emitter.py
index acb5763280905..937e0902d6d8c 100644
--- a/metadata-ingestion/src/datahub/emitter/rest_emitter.py
+++ b/metadata-ingestion/src/datahub/emitter/rest_emitter.py
@@ -120,11 +120,15 @@ def __init__(
             self._retry_max_times = retry_max_times
 
         try:
+            # Set raise_on_status to False to propagate errors:
+            # https://stackoverflow.com/questions/70189330/determine-status-code-from-python-retry-exception
+            # Must call `raise_for_status` after making a request, which we do
             retry_strategy = Retry(
                 total=self._retry_max_times,
                 status_forcelist=self._retry_status_codes,
                 backoff_factor=2,
                 allowed_methods=self._retry_methods,
+                raise_on_status=False,
             )
         except TypeError:
             # Prior to urllib3 1.26, the Retry class used `method_whitelist` instead of `allowed_methods`.
@@ -133,6 +137,7 @@ def __init__(
                 status_forcelist=self._retry_status_codes,
                 backoff_factor=2,
                 method_whitelist=self._retry_methods,
+                raise_on_status=False,
             )
 
         adapter = HTTPAdapter(

From 31abf383d13538cdb2fdb3b89ca3ca1fe6b1590f Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Thu, 14 Sep 2023 11:34:21 +0900
Subject: [PATCH 010/156] ci: add markdown-link-check (#8771)

---
 README.md                                     |   6 +-
 docs-website/build.gradle                     |   2 +-
 docs-website/markdown-link-check-config.json  |  50 +++++++
 docs-website/package.json                     |   3 +-
 docs-website/yarn.lock                        | 122 ++++++++++++++++--
 docs/advanced/no-code-modeling.md             |   7 +-
 docs/api/graphql/how-to-set-up-graphql.md     |   2 +-
 docs/architecture/architecture.md             |   2 +-
 docs/authentication/guides/add-users.md       |   8 +-
 .../guides/sso/configure-oidc-react.md        |   2 +-
 docs/cli.md                                   |   2 +-
 docs/domains.md                               |  19 ++-
 docs/how/add-new-aspect.md                    |  10 +-
 docs/modeling/extending-the-metadata-model.md |  10 +-
 docs/modeling/metadata-model.md               |   4 +-
 docs/tags.md                                  |  10 +-
 docs/townhall-history.md                      |   2 +-
 docs/what/gms.md                              |   4 +-
 docs/what/mxe.md                              |   2 +-
 docs/what/relationship.md                     |   3 -
 docs/what/search-document.md                  |   1 -
 .../add_stateful_ingestion_to_source.md       |  13 +-
 .../docs/dev_guides/reporting_telemetry.md    |   2 +-
 .../docs/dev_guides/stateful.md               |  16 +--
 metadata-ingestion/docs/sources/gcs/README.md |   4 +-
 .../docs/sources/kafka-connect/README.md      |  10 +-
 metadata-ingestion/docs/sources/s3/README.md  |   4 +-
 .../examples/transforms/README.md             |   2 +-
 .../source/usage/starburst_trino_usage.py     |   3 -
 metadata-jobs/README.md                       |   4 +-
 metadata-models/docs/entities/dataPlatform.md |   4 +-
 31 files changed, 236 insertions(+), 97 deletions(-)
 create mode 100644 docs-website/markdown-link-check-config.json

diff --git a/README.md b/README.md
index 951dcebad6498..79f85433fbc18 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ export const Logo = (props) => {
     <div style={{ display: "flex", justifyContent: "center", padding: "20px", height: "190px" }}>
       <img
         alt="DataHub Logo"
-        src={useBaseUrl("/img/datahub-logo-color-mark.svg")}
+        src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/datahub-logo-color-mark.svg"
         {...props}
       />
     </div>
@@ -18,7 +18,7 @@ export const Logo = (props) => {
 <!--
 HOSTED_DOCS_ONLY-->
 <p align="center">
-<img alt="DataHub" src="docs/imgs/datahub-logo-color-mark.svg" height="150" />
+<img alt="DataHub" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/datahub-logo-color-mark.svg" height="150" />
 </p>
 <!-- -->
 
@@ -156,7 +156,7 @@ Here are the companies that have officially adopted DataHub. Please feel free to
 
 - [DataHub Blog](https://blog.datahubproject.io/)
 - [DataHub YouTube Channel](https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w)
-- [Optum: Data Mesh via DataHub](https://optum.github.io/blog/2022/03/23/data-mesh-via-datahub/)
+- [Optum: Data Mesh via DataHub](https://opensource.optum.com/blog/2022/03/23/data-mesh-via-datahub)
 - [Saxo Bank: Enabling Data Discovery in Data Mesh](https://medium.com/datahub-project/enabling-data-discovery-in-a-data-mesh-the-saxo-journey-451b06969c8f)
 - [Bringing The Power Of The DataHub Real-Time Metadata Graph To Everyone At Acryl Data](https://www.dataengineeringpodcast.com/acryl-data-datahub-metadata-graph-episode-230/)
 - [DataHub: Popular Metadata Architectures Explained](https://engineering.linkedin.com/blog/2020/datahub-popular-metadata-architectures-explained)
diff --git a/docs-website/build.gradle b/docs-website/build.gradle
index 370ae3eec9176..a213ec1ae8194 100644
--- a/docs-website/build.gradle
+++ b/docs-website/build.gradle
@@ -89,7 +89,7 @@ task fastReload(type: YarnTask) {
   args = ['run', 'generate-rsync']
 }
 
-task yarnLint(type: YarnTask, dependsOn: [yarnInstall]) {
+task yarnLint(type: YarnTask, dependsOn: [yarnInstall, yarnGenerate]) {
   inputs.files(projectMdFiles)
   args = ['run', 'lint-check']
   outputs.dir("dist")
diff --git a/docs-website/markdown-link-check-config.json b/docs-website/markdown-link-check-config.json
new file mode 100644
index 0000000000000..26e040edde6f7
--- /dev/null
+++ b/docs-website/markdown-link-check-config.json
@@ -0,0 +1,50 @@
+{
+  "ignorePatterns": [
+    {
+    "pattern": "^http://demo\\.datahubproject\\.io"
+    },
+    {
+    "pattern": "^http://localhost"
+    },
+    {
+    "pattern": "^http://www.famfamfam.com"
+    },
+    {
+    "pattern": "^http://www.linkedin.com"
+    },
+    {
+    "pattern": "\\.md$"
+    },
+    {
+    "pattern":"\\.json$"
+    },
+    {
+    "pattern":"\\.txt$"
+    },
+    {
+    "pattern": "\\.java$"
+    },
+    {
+    "pattern": "\\.md#.*$"
+    },
+    {
+    "pattern": "^https://oauth2.googleapis.com/token"
+    },
+    {
+    "pattern": "^https://login.microsoftonline.com/common/oauth2/na$"
+    },
+    {
+    "pattern": "#v(\\d+)-(\\d+)-(\\d+)"
+    },
+    {
+    "pattern": "^https://github.com/mohdsiddique$"
+    },
+    {
+    "pattern": "^https://github.com/2x$"
+    },
+    {
+    "pattern": "^https://github.com/datahub-project/datahub/assets/15873986/2f47d033-6c2b-483a-951d-e6d6b807f0d0%22%3E$"
+    }
+  ],
+  "aliveStatusCodes": [200, 206, 0, 999, 400, 401, 403]
+}
\ No newline at end of file
diff --git a/docs-website/package.json b/docs-website/package.json
index 400ef4143c786..1722f92169692 100644
--- a/docs-website/package.json
+++ b/docs-website/package.json
@@ -17,7 +17,7 @@
     "generate": "rm -rf genDocs genStatic && mkdir genDocs genStatic && yarn _generate-docs && mv docs/* genDocs/ && rmdir docs",
     "generate-rsync": "mkdir -p genDocs genStatic && yarn _generate-docs && rsync -v --checksum -r -h -i --delete docs/ genDocs && rm -rf docs",
     "lint": "prettier -w generateDocsDir.ts sidebars.js src/pages/index.js",
-    "lint-check": "prettier -l generateDocsDir.ts sidebars.js src/pages/index.js",
+    "lint-check": "prettier -l generateDocsDir.ts sidebars.js src/pages/index.js && find ./genDocs -name \\*.md -not -path \"./genDocs/python-sdk/models.md\" -print0 | xargs -0 -n1 markdown-link-check -p -q -c markdown-link-check-config.json",
     "lint-fix": "prettier --write generateDocsDir.ts sidebars.js src/pages/index.js"
   },
   "dependencies": {
@@ -37,6 +37,7 @@
     "docusaurus-graphql-plugin": "0.5.0",
     "docusaurus-plugin-sass": "^0.2.1",
     "dotenv": "^16.0.1",
+    "markdown-link-check": "^3.11.2",
     "markprompt": "^0.1.7",
     "react": "^18.2.0",
     "react-dom": "18.2.0",
diff --git a/docs-website/yarn.lock b/docs-website/yarn.lock
index 0613fe71ef78e..5698029bff70a 100644
--- a/docs-website/yarn.lock
+++ b/docs-website/yarn.lock
@@ -3414,6 +3414,11 @@ async-validator@^4.1.0:
   resolved "https://registry.yarnpkg.com/async-validator/-/async-validator-4.2.5.tgz#c96ea3332a521699d0afaaceed510a54656c6339"
   integrity sha512-7HhHjtERjqlNbZtqNqy2rckN/SpOOlmDliet+lP7k+eKZEjPk3DgyeU9lIXLdeLz0uBbbVp+9Qdow9wJWgwwfg==
 
+async@^3.2.4:
+  version "3.2.4"
+  resolved "https://registry.yarnpkg.com/async/-/async-3.2.4.tgz#2d22e00f8cddeb5fde5dd33522b56d1cf569a81c"
+  integrity sha512-iAB+JbDEGXhyIUavoDl9WP/Jj106Kz9DEn1DPgYw5ruDn0e3Wgi3sKFm55sASdGBNOQB8F59d9qQ7deqrHA8wQ==
+
 asynckit@^0.4.0:
   version "0.4.0"
   resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79"
@@ -3765,6 +3770,11 @@ chalk@^4.0.0, chalk@^4.1.0, chalk@^4.1.2:
     ansi-styles "^4.1.0"
     supports-color "^7.1.0"
 
+chalk@^5.2.0:
+  version "5.3.0"
+  resolved "https://registry.yarnpkg.com/chalk/-/chalk-5.3.0.tgz#67c20a7ebef70e7f3970a01f90fa210cb6860385"
+  integrity sha512-dLitG79d+GV1Nb/VYcCDFivJeK1hiukt9QjRNVOsUtTy1rR1YJsmpGGTZ3qJos+uw7WmWF4wUwBd9jxjocFC2w==
+
 character-entities-legacy@^1.0.0:
   version "1.1.4"
   resolved "https://registry.yarnpkg.com/character-entities-legacy/-/character-entities-legacy-1.1.4.tgz#94bc1845dce70a5bb9d2ecc748725661293d8fc1"
@@ -3797,7 +3807,7 @@ cheerio-select@^2.1.0:
     domhandler "^5.0.3"
     domutils "^3.0.1"
 
-cheerio@^1.0.0-rc.12:
+cheerio@^1.0.0-rc.10, cheerio@^1.0.0-rc.12:
   version "1.0.0-rc.12"
   resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0-rc.12.tgz#788bf7466506b1c6bf5fae51d24a2c4d62e47683"
   integrity sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==
@@ -3984,6 +3994,11 @@ comma-separated-tokens@^2.0.0:
   resolved "https://registry.yarnpkg.com/comma-separated-tokens/-/comma-separated-tokens-2.0.3.tgz#4e89c9458acb61bc8fef19f4529973b2392839ee"
   integrity sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg==
 
+commander@^10.0.1:
+  version "10.0.1"
+  resolved "https://registry.yarnpkg.com/commander/-/commander-10.0.1.tgz#881ee46b4f77d1c1dccc5823433aa39b022cbe06"
+  integrity sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==
+
 commander@^2.20.0:
   version "2.20.3"
   resolved "https://registry.yarnpkg.com/commander/-/commander-2.20.3.tgz#fd485e84c03eb4881c20722ba48035e8531aeb33"
@@ -4385,6 +4400,13 @@ debug@4, debug@^4.0.0, debug@^4.1.0, debug@^4.1.1:
   dependencies:
     ms "2.1.2"
 
+debug@^3.2.6:
+  version "3.2.7"
+  resolved "https://registry.yarnpkg.com/debug/-/debug-3.2.7.tgz#72580b7e9145fb39b6676f9c5e5fb100b934179a"
+  integrity sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==
+  dependencies:
+    ms "^2.1.1"
+
 decode-named-character-reference@^1.0.0:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/decode-named-character-reference/-/decode-named-character-reference-1.0.2.tgz#daabac9690874c394c81e4162a0304b35d824f0e"
@@ -5551,6 +5573,13 @@ html-entities@^2.3.2:
   resolved "https://registry.yarnpkg.com/html-entities/-/html-entities-2.4.0.tgz#edd0cee70402584c8c76cc2c0556db09d1f45061"
   integrity sha512-igBTJcNNNhvZFRtm8uA6xMY6xYleeDwn3PeBCkDz7tHttv4F2hsDI2aPgNERWzvRcNYHNT3ymRaQzllmXj4YsQ==
 
+html-link-extractor@^1.0.5:
+  version "1.0.5"
+  resolved "https://registry.yarnpkg.com/html-link-extractor/-/html-link-extractor-1.0.5.tgz#a4be345cb13b8c3352d82b28c8b124bb7bf5dd6f"
+  integrity sha512-ADd49pudM157uWHwHQPUSX4ssMsvR/yHIswOR5CUfBdK9g9ZYGMhVSE6KZVHJ6kCkR0gH4htsfzU6zECDNVwyw==
+  dependencies:
+    cheerio "^1.0.0-rc.10"
+
 html-minifier-terser@^6.0.2, html-minifier-terser@^6.1.0:
   version "6.1.0"
   resolved "https://registry.yarnpkg.com/html-minifier-terser/-/html-minifier-terser-6.1.0.tgz#bfc818934cc07918f6b3669f5774ecdfd48f32ab"
@@ -5673,6 +5702,13 @@ iconv-lite@0.4.24:
   dependencies:
     safer-buffer ">= 2.1.2 < 3"
 
+iconv-lite@^0.6.3:
+  version "0.6.3"
+  resolved "https://registry.yarnpkg.com/iconv-lite/-/iconv-lite-0.6.3.tgz#a52f80bf38da1952eb5c681790719871a1a72501"
+  integrity sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==
+  dependencies:
+    safer-buffer ">= 2.1.2 < 3.0.0"
+
 icss-utils@^5.0.0, icss-utils@^5.1.0:
   version "5.1.0"
   resolved "https://registry.yarnpkg.com/icss-utils/-/icss-utils-5.1.0.tgz#c6be6858abd013d768e98366ae47e25d5887b1ae"
@@ -5795,6 +5831,11 @@ ipaddr.js@^2.0.1:
   resolved "https://registry.yarnpkg.com/ipaddr.js/-/ipaddr.js-2.1.0.tgz#2119bc447ff8c257753b196fc5f1ce08a4cdf39f"
   integrity sha512-LlbxQ7xKzfBusov6UMi4MFpEg0m+mAm9xyNGEduwXMEDuf4WfzB/RZwMVYEd7IKGvh4IUkEXYxtAVu9T3OelJQ==
 
+is-absolute-url@^4.0.1:
+  version "4.0.1"
+  resolved "https://registry.yarnpkg.com/is-absolute-url/-/is-absolute-url-4.0.1.tgz#16e4d487d4fded05cfe0685e53ec86804a5e94dc"
+  integrity sha512-/51/TKE88Lmm7Gc4/8btclNXWS+g50wXhYJq8HWIBAGUBnoAdRu1aXeh364t/O7wXDAcTJDP8PNuNKWUDWie+A==
+
 is-alphabetical@1.0.4, is-alphabetical@^1.0.0:
   version "1.0.4"
   resolved "https://registry.yarnpkg.com/is-alphabetical/-/is-alphabetical-1.0.4.tgz#9e7d6b94916be22153745d184c298cbf986a686d"
@@ -5963,6 +6004,13 @@ is-regexp@^1.0.0:
   resolved "https://registry.yarnpkg.com/is-regexp/-/is-regexp-1.0.0.tgz#fd2d883545c46bac5a633e7b9a09e87fa2cb5069"
   integrity sha512-7zjFAPO4/gwyQAAgRRmqeEeyIICSdmCqa3tsVHMdBzaXXRiqopZL4Cyghg/XulGWrtABTpbnYYzzIRffLkP4oA==
 
+is-relative-url@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/is-relative-url/-/is-relative-url-4.0.0.tgz#4d8371999ff6033b76e4d9972fb5bf496fddfa97"
+  integrity sha512-PkzoL1qKAYXNFct5IKdKRH/iBQou/oCC85QhXj6WKtUQBliZ4Yfd3Zk27RHu9KQG8r6zgvAA2AQKC9p+rqTszg==
+  dependencies:
+    is-absolute-url "^4.0.1"
+
 is-root@^2.1.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/is-root/-/is-root-2.1.0.tgz#809e18129cf1129644302a4f8544035d51984a9c"
@@ -6010,6 +6058,13 @@ isarray@~1.0.0:
   resolved "https://registry.yarnpkg.com/isarray/-/isarray-1.0.0.tgz#bb935d48582cba168c06834957a54a3e07124f11"
   integrity sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==
 
+isemail@^3.2.0:
+  version "3.2.0"
+  resolved "https://registry.yarnpkg.com/isemail/-/isemail-3.2.0.tgz#59310a021931a9fb06bbb51e155ce0b3f236832c"
+  integrity sha512-zKqkK+O+dGqevc93KNsbZ/TqTUFd46MwWjYOoMrjIMZ51eU7DtQG3Wmd9SQQT7i7RVnuTPEiYEWHU3MSbxC1Tg==
+  dependencies:
+    punycode "2.x.x"
+
 isexe@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10"
@@ -6205,6 +6260,16 @@ lines-and-columns@^1.1.6:
   resolved "https://registry.yarnpkg.com/lines-and-columns/-/lines-and-columns-1.2.4.tgz#eca284f75d2965079309dc0ad9255abb2ebc1632"
   integrity sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==
 
+link-check@^5.2.0:
+  version "5.2.0"
+  resolved "https://registry.yarnpkg.com/link-check/-/link-check-5.2.0.tgz#595a339d305900bed8c1302f4342a29c366bf478"
+  integrity sha512-xRbhYLaGDw7eRDTibTAcl6fXtmUQ13vkezQiTqshHHdGueQeumgxxmQMIOmJYsh2p8BF08t8thhDQ++EAOOq3w==
+  dependencies:
+    is-relative-url "^4.0.0"
+    isemail "^3.2.0"
+    ms "^2.1.3"
+    needle "^3.1.0"
+
 loader-runner@^4.2.0:
   version "4.3.0"
   resolved "https://registry.yarnpkg.com/loader-runner/-/loader-runner-4.3.0.tgz#c1b4a163b99f614830353b16755e7149ac2314e1"
@@ -6366,6 +6431,28 @@ markdown-escapes@^1.0.0:
   resolved "https://registry.yarnpkg.com/markdown-escapes/-/markdown-escapes-1.0.4.tgz#c95415ef451499d7602b91095f3c8e8975f78535"
   integrity sha512-8z4efJYk43E0upd0NbVXwgSTQs6cT3T06etieCMEg7dRbzCbxUCK/GHlX8mhHRDcp+OLlHkPKsvqQTCvsRl2cg==
 
+markdown-link-check@^3.11.2:
+  version "3.11.2"
+  resolved "https://registry.yarnpkg.com/markdown-link-check/-/markdown-link-check-3.11.2.tgz#303a8a03d4a34c42ef3158e0b245bced26b5d904"
+  integrity sha512-zave+vI4AMeLp0FlUllAwGbNytSKsS3R2Zgtf3ufVT892Z/L6Ro9osZwE9PNA7s0IkJ4onnuHqatpsaCiAShJw==
+  dependencies:
+    async "^3.2.4"
+    chalk "^5.2.0"
+    commander "^10.0.1"
+    link-check "^5.2.0"
+    lodash "^4.17.21"
+    markdown-link-extractor "^3.1.0"
+    needle "^3.2.0"
+    progress "^2.0.3"
+
+markdown-link-extractor@^3.1.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/markdown-link-extractor/-/markdown-link-extractor-3.1.0.tgz#0d5a703630d791a9e2017449e1a9b294f2d2b676"
+  integrity sha512-r0NEbP1dsM+IqB62Ru9TXLP/HDaTdBNIeylYXumuBi6Xv4ufjE1/g3TnslYL8VNqNcGAGbMptQFHrrdfoZ/Sug==
+  dependencies:
+    html-link-extractor "^1.0.5"
+    marked "^4.1.0"
+
 markdown-table@^3.0.0:
   version "3.0.3"
   resolved "https://registry.yarnpkg.com/markdown-table/-/markdown-table-3.0.3.tgz#e6331d30e493127e031dd385488b5bd326e4a6bd"
@@ -6376,6 +6463,11 @@ marked@^2.0.3:
   resolved "https://registry.yarnpkg.com/marked/-/marked-2.1.3.tgz#bd017cef6431724fd4b27e0657f5ceb14bff3753"
   integrity sha512-/Q+7MGzaETqifOMWYEA7HVMaZb4XbcRfaOzcSsHZEith83KGlvaSG33u0SKu89Mj5h+T8V2hM+8O45Qc5XTgwA==
 
+marked@^4.1.0:
+  version "4.3.0"
+  resolved "https://registry.yarnpkg.com/marked/-/marked-4.3.0.tgz#796362821b019f734054582038b116481b456cf3"
+  integrity sha512-PRsaiG84bK+AMvxziE/lCFss8juXjNaWzVbN5tXAm4XjeaS9NAHhop+PjQxz2A9h8Q4M/xGmzP8vqNwy6JeK0A==
+
 markprompt@^0.1.7:
   version "0.1.7"
   resolved "https://registry.yarnpkg.com/markprompt/-/markprompt-0.1.7.tgz#fa049e11109d93372c45c38b3ca40bd5fdf751ea"
@@ -6978,7 +7070,7 @@ ms@2.1.2:
   resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009"
   integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==
 
-ms@2.1.3:
+ms@2.1.3, ms@^2.1.1, ms@^2.1.3:
   version "2.1.3"
   resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.3.tgz#574c8138ce1d2b5861f0b44579dbadd60c6615b2"
   integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==
@@ -7001,6 +7093,15 @@ napi-build-utils@^1.0.1:
   resolved "https://registry.yarnpkg.com/napi-build-utils/-/napi-build-utils-1.0.2.tgz#b1fddc0b2c46e380a0b7a76f984dd47c41a13806"
   integrity sha512-ONmRUqK7zj7DWX0D9ADe03wbwOBZxNAfF20PlGfCWQcD3+/MakShIHrMqx9YwPTfxDdF1zLeL+RGZiR9kGMLdg==
 
+needle@^3.1.0, needle@^3.2.0:
+  version "3.2.0"
+  resolved "https://registry.yarnpkg.com/needle/-/needle-3.2.0.tgz#07d240ebcabfd65c76c03afae7f6defe6469df44"
+  integrity sha512-oUvzXnyLiVyVGoianLijF9O/RecZUf7TkBfimjGrLM4eQhXyeJwM6GeAWccwfQ9aa4gMCZKqhAOuLaMIcQxajQ==
+  dependencies:
+    debug "^3.2.6"
+    iconv-lite "^0.6.3"
+    sax "^1.2.4"
+
 negotiator@0.6.3:
   version "0.6.3"
   resolved "https://registry.yarnpkg.com/negotiator/-/negotiator-0.6.3.tgz#58e323a72fedc0d6f9cd4d31fe49f51479590ccd"
@@ -7753,6 +7854,11 @@ process-nextick-args@~2.0.0:
   resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz#7820d9b16120cc55ca9ae7792680ae7dba6d7fe2"
   integrity sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==
 
+progress@^2.0.3:
+  version "2.0.3"
+  resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8"
+  integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==
+
 promise@^7.1.1:
   version "7.3.1"
   resolved "https://registry.yarnpkg.com/promise/-/promise-7.3.1.tgz#064b72602b18f90f29192b8b1bc418ffd1ebd3bf"
@@ -7805,16 +7911,16 @@ pump@^3.0.0:
     end-of-stream "^1.1.0"
     once "^1.3.1"
 
+punycode@2.x.x, punycode@^2.1.0:
+  version "2.3.0"
+  resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.3.0.tgz#f67fa67c94da8f4d0cfff981aee4118064199b8f"
+  integrity sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA==
+
 punycode@^1.3.2:
   version "1.4.1"
   resolved "https://registry.yarnpkg.com/punycode/-/punycode-1.4.1.tgz#c0d5a63b2718800ad8e1eb0fa5269c84dd41845e"
   integrity sha512-jmYNElW7yvO7TV33CjSmvSiE2yco3bV2czu/OzDKdMNVZQWfxCblURLhf+47syQRBntjfLdd/H0egrzIG+oaFQ==
 
-punycode@^2.1.0:
-  version "2.3.0"
-  resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.3.0.tgz#f67fa67c94da8f4d0cfff981aee4118064199b8f"
-  integrity sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA==
-
 pupa@^2.1.1:
   version "2.1.1"
   resolved "https://registry.yarnpkg.com/pupa/-/pupa-2.1.1.tgz#f5e8fd4afc2c5d97828faa523549ed8744a20d62"
@@ -8789,7 +8895,7 @@ safe-buffer@5.2.1, safe-buffer@>=5.1.0, safe-buffer@^5.0.1, safe-buffer@^5.1.0,
   resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6"
   integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==
 
-"safer-buffer@>= 2.1.2 < 3":
+"safer-buffer@>= 2.1.2 < 3", "safer-buffer@>= 2.1.2 < 3.0.0":
   version "2.1.2"
   resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a"
   integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==
diff --git a/docs/advanced/no-code-modeling.md b/docs/advanced/no-code-modeling.md
index d76b776d3dddb..172e63f821eab 100644
--- a/docs/advanced/no-code-modeling.md
+++ b/docs/advanced/no-code-modeling.md
@@ -100,10 +100,9 @@ Currently, there are various models in GMS:
 1. [Urn](https://github.com/datahub-project/datahub/blob/master/li-utils/src/main/pegasus/com/linkedin/common/DatasetUrn.pdl) - Structs composing primary keys
 2. [Root] [Snapshots](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/snapshot/Snapshot.pdl) - Container of aspects
 3. [Aspects](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/aspect/DashboardAspect.pdl) - Optional container of fields
-4. [Values](https://github.com/datahub-project/datahub/blob/master/gms/api/src/main/pegasus/com/linkedin/dataset/Dataset.pdl), [Keys](https://github.com/datahub-project/datahub/blob/master/gms/api/src/main/pegasus/com/linkedin/dataset/DatasetKey.pdl) - Model returned by GMS [Rest.li](http://rest.li) API (public facing)
-5. [Entities](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DatasetEntity.pdl) - Records with fields derived from the URN. Used only in graph / relationships
-6. [Relationships](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/Relationship.pdl) - Edges between 2 entities with optional edge properties
-7. [Search Documents](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/search/ChartDocument.pdl) - Flat documents for indexing within Elastic index
+4. [Keys](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/key/DatasetKey.pdl) - Model returned by GMS [Rest.li](http://rest.li) API (public facing)
+5. [Relationships](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/common/EntityRelationship.pdl) - Edges between 2 entities with optional edge properties
+6. Search Documents - Flat documents for indexing within Elastic index
   - And corresponding index [mappings.json](https://github.com/datahub-project/datahub/blob/master/gms/impl/src/main/resources/index/chart/mappings.json), [settings.json](https://github.com/datahub-project/datahub/blob/master/gms/impl/src/main/resources/index/chart/settings.json)
 
 Various components of GMS depend on / make assumptions about these model types:
diff --git a/docs/api/graphql/how-to-set-up-graphql.md b/docs/api/graphql/how-to-set-up-graphql.md
index 584bf34ad3f92..2be2f935b12b1 100644
--- a/docs/api/graphql/how-to-set-up-graphql.md
+++ b/docs/api/graphql/how-to-set-up-graphql.md
@@ -68,7 +68,7 @@ In the request body, select the `GraphQL` option and enter your GraphQL query in
 </p>
 
 
-Please refer to [Querying with GraphQL](https://learning.postman.com/docs/sending-requests/graphql/graphql/) in the Postman documentation for more information.
+Please refer to [Querying with GraphQL](https://learning.postman.com/docs/sending-requests/graphql/graphql-overview/) in the Postman documentation for more information.
 
 ### Authentication + Authorization
 
diff --git a/docs/architecture/architecture.md b/docs/architecture/architecture.md
index 6a9c1860d71b0..20f18f09d949b 100644
--- a/docs/architecture/architecture.md
+++ b/docs/architecture/architecture.md
@@ -17,7 +17,7 @@ The figures below describe the high-level architecture of DataHub.
 
 
 <p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/DataHub-Architecture.png"/>
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/saas/DataHub-Architecture.png"/>
 </p>
 
 
diff --git a/docs/authentication/guides/add-users.md b/docs/authentication/guides/add-users.md
index f5dfc83201083..d380cacd6665e 100644
--- a/docs/authentication/guides/add-users.md
+++ b/docs/authentication/guides/add-users.md
@@ -19,13 +19,13 @@ To do so, navigate to the **Users & Groups** section inside of Settings page. He
 do not have the correct privileges to invite users, this button will be disabled.
 
 <p align="center">
-  <img width="100%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/invite-users-button.png"/>
+  <img width="100%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master/imgs/invite-users-button.png"/>
 </p>
 
 To invite new users, simply share the link with others inside your organization.
 
 <p align="center">
-  <img width="70%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/invite-users-popup.png"/>
+  <img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master/imgs/invite-users-popup.png"/>
 </p>
 
 When a new user visits the link, they will be directed to a sign up screen where they can create their DataHub account.
@@ -37,13 +37,13 @@ and click **Reset user password** inside the menu dropdown on the right hand sid
 `Manage User Credentials` [Platform Privilege](../../authorization/access-policies-guide.md) in order to reset passwords.
 
 <p align="center">
-  <img width="100%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/reset-user-password-button.png"/>
+  <img width="100%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master/imgs/reset-user-password-button.png"/>
 </p>
 
 To reset the password, simply share the password reset link with the user who needs to change their password. Password reset links expire after 24 hours.
 
 <p align="center">
-  <img width="70%" src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/reset-user-password-popup.png"/>
+  <img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/master/imgs/reset-user-password-popup.png"/>
 </p>
 
 # Configuring Single Sign-On with OpenID Connect
diff --git a/docs/authentication/guides/sso/configure-oidc-react.md b/docs/authentication/guides/sso/configure-oidc-react.md
index d27792ce3967b..512d6adbf916f 100644
--- a/docs/authentication/guides/sso/configure-oidc-react.md
+++ b/docs/authentication/guides/sso/configure-oidc-react.md
@@ -26,7 +26,7 @@ please see [this guide](../jaas.md) to mount a custom user.props file for a JAAS
 To configure OIDC in React, you will most often need to register yourself as a client with your identity provider (Google, Okta, etc). Each provider may
 have their own instructions. Provided below are links to examples for Okta, Google, Azure AD, & Keycloak.
 
-- [Registering an App in Okta](https://developer.okta.com/docs/guides/add-an-external-idp/apple/register-app-in-okta/)
+- [Registering an App in Okta](https://developer.okta.com/docs/guides/add-an-external-idp/openidconnect/main/)
 - [OpenID Connect in Google Identity](https://developers.google.com/identity/protocols/oauth2/openid-connect)
 - [OpenID Connect authentication with Azure Active Directory](https://docs.microsoft.com/en-us/azure/active-directory/fundamentals/auth-oidc)
 - [Keycloak - Securing Applications and Services Guide](https://www.keycloak.org/docs/latest/securing_apps/)
diff --git a/docs/cli.md b/docs/cli.md
index eb8bb406b0107..267f289d9f54a 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -547,7 +547,7 @@ Old Entities Migrated = {'urn:li:dataset:(urn:li:dataPlatform:hive,logging_event
 ### Using docker
 
 [![Docker Hub](https://img.shields.io/docker/pulls/acryldata/datahub-ingestion?style=plastic)](https://hub.docker.com/r/acryldata/datahub-ingestion)
-[![datahub-ingestion docker](https://github.com/acryldata/datahub/actions/workflows/docker-ingestion.yml/badge.svg)](https://github.com/acryldata/datahub/actions/workflows/docker-ingestion.yml)
+[![datahub-ingestion docker](https://github.com/acryldata/datahub/workflows/datahub-ingestion%20docker/badge.svg)](https://github.com/acryldata/datahub/actions/workflows/docker-ingestion.yml)
 
 If you don't want to install locally, you can alternatively run metadata ingestion within a Docker container.
 We have prebuilt images available on [Docker hub](https://hub.docker.com/r/acryldata/datahub-ingestion). All plugins will be installed and enabled automatically.
diff --git a/docs/domains.md b/docs/domains.md
index c846a753417c5..1b2ebc9d47f39 100644
--- a/docs/domains.md
+++ b/docs/domains.md
@@ -22,20 +22,20 @@ You can create this privileges by creating a new [Metadata Policy](./authorizati
 To create a Domain, first navigate to the **Domains** tab in the top-right menu of DataHub.
 
 <p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/domains-tab.png"/>
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/master//imgs/domains-tab.png"/>
 </p>
 
 Once you're on the Domains page, you'll see a list of all the Domains that have been created on DataHub. Additionally, you can
 view the number of entities inside each Domain. 
 
 <p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/list-domains.png"/>
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/master/imgs/list-domains.png"/>
 </p>
 
 To create a new Domain, click '+ New Domain'.
 
 <p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/create-domain.png"/>
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/master//imgs/create-domain.png"/>
 </p>
 
 Inside the form, you can choose a name for your Domain. Most often, this will align with your business units or groups, for example
@@ -48,7 +48,7 @@ for the Domain. This option is useful if you intend to refer to Domains by a com
 key to be human-readable. Proceed with caution: once you select a custom id, it cannot be easily changed. 
 
 <p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/set-domain-id.png"/>
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/master//imgs/set-domain-id.png"/>
 </p>
 
 By default, you don't need to worry about this. DataHub will auto-generate a unique Domain id for you. 
@@ -64,7 +64,7 @@ To assign an asset to a Domain, simply navigate to the asset's profile page. At
 see a 'Domain' section. Click 'Set Domain', and then search for the Domain you'd like to add to. When you're done, click 'Add'.
 
 <p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/set-domain.png"/>
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/master//imgs/set-domain.png"/>
 </p>
 
 To remove an asset from a Domain, click the 'x' icon on the Domain tag. 
@@ -149,27 +149,27 @@ source:
 Once you've created a Domain, you can use the search bar to find it.
 
 <p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/search-domain.png"/>
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/master//imgs/search-domain.png"/>
 </p>
 
 Clicking on the search result will take you to the Domain's profile, where you
 can edit its description, add / remove owners, and view the assets inside the Domain. 
 
 <p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/domain-entities.png"/>
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/master//imgs/domain-entities.png"/>
 </p>
 
 Once you've added assets to a Domain, you can filter search results to limit to those Assets
 within a particular Domain using the left-side search filters. 
 
 <p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/search-by-domain.png"/>
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/master//imgs/search-by-domain.png"/>
 </p>
 
 On the homepage, you'll also find a list of the most popular Domains in your organization.
 
 <p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/browse-domains.png"/>
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/master//imgs/browse-domains.png"/>
 </p>
 
 ## Additional Resources
@@ -242,7 +242,6 @@ DataHub supports Tags, Glossary Terms, & Domains as distinct types of Metadata t
 - **Tags**: Informal, loosely controlled labels that serve as a tool for search & discovery. Assets may have multiple tags. No formal, central management.
 - **Glossary Terms**: A controlled vocabulary, with optional hierarchy. Terms are typically used to standardize types of leaf-level attributes (i.e. schema fields) for governance. E.g. (EMAIL_PLAINTEXT)
 - **Domains**: A set of top-level categories. Usually aligned to business units / disciplines to which the assets are most relevant. Central or distributed management. Single Domain assignment per data asset.
-
 *Need more help? Join the conversation in [Slack](http://slack.datahubproject.io)!*
 
 ### Related Features
diff --git a/docs/how/add-new-aspect.md b/docs/how/add-new-aspect.md
index 6ea7256ed75cc..d1fe567018903 100644
--- a/docs/how/add-new-aspect.md
+++ b/docs/how/add-new-aspect.md
@@ -1,20 +1,20 @@
 # How to add a new metadata aspect?
 
 Adding a new metadata [aspect](../what/aspect.md) is one of the most common ways to extend an existing [entity](../what/entity.md).
-We'll use the [CorpUserEditableInfo](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/identity/CorpUserEditableInfo.pdl) as an example here.
+We'll use the CorpUserEditableInfo as an example here.
 
 1. Add the aspect model to the corresponding namespace (e.g. [`com.linkedin.identity`](https://github.com/datahub-project/datahub/tree/master/metadata-models/src/main/pegasus/com/linkedin/identity))
 
-2. Extend the entity's aspect union to include the new aspect (e.g. [`CorpUserAspect`](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/aspect/CorpUserAspect.pdl))
+2. Extend the entity's aspect union to include the new aspect.
 
 3. Rebuild the rest.li [IDL & snapshot](https://linkedin.github.io/rest.li/modeling/compatibility_check) by running the following command from the project root
 ```
 ./gradlew :metadata-service:restli-servlet-impl:build -Prest.model.compatibility=ignore
 ```
 
-4. To surface the new aspect at the top-level [resource endpoint](https://linkedin.github.io/rest.li/user_guide/restli_server#writing-resources), extend the resource data model (e.g. [`CorpUser`](https://github.com/datahub-project/datahub/blob/master/gms/api/src/main/pegasus/com/linkedin/identity/CorpUser.pdl)) with an optional field (e.g. [`editableInfo`](https://github.com/datahub-project/datahub/blob/master/gms/api/src/main/pegasus/com/linkedin/identity/CorpUser.pdl#L21)). You'll also need to extend the `toValue` & `toSnapshot` methods of the top-level resource (e.g. [`CorpUsers`](https://github.com/datahub-project/datahub/blob/master/gms/impl/src/main/java/com/linkedin/metadata/resources/identity/CorpUsers.java)) to convert between the snapshot & value models.
+4. To surface the new aspect at the top-level [resource endpoint](https://linkedin.github.io/rest.li/user_guide/restli_server#writing-resources), extend the resource data model with an optional field. You'll also need to extend the `toValue` & `toSnapshot` methods of the top-level resource (e.g. [`CorpUsers`](https://github.com/datahub-project/datahub/blob/master/gms/impl/src/main/java/com/linkedin/metadata/resources/identity/CorpUsers.java)) to convert between the snapshot & value models.
 
-5. (Optional) If there's need to update the aspect via API (instead of/in addition to MCE), add a [sub-resource](https://linkedin.github.io/rest.li/user_guide/restli_server#sub-resources) endpoint for the new aspect (e.g. [`CorpUsersEditableInfoResource`](https://github.com/datahub-project/datahub/blob/master/gms/impl/src/main/java/com/linkedin/metadata/resources/identity/CorpUsersEditableInfoResource.java)). The sub-resource endpiont also allows you to retrieve previous versions of the aspect as well as additional metadata such as the audit stamp.
+5. (Optional) If there's need to update the aspect via API (instead of/in addition to MCE), add a [sub-resource](https://linkedin.github.io/rest.li/user_guide/restli_server#sub-resources) endpoint for the new aspect (e.g. `CorpUsersEditableInfoResource`). The sub-resource endpiont also allows you to retrieve previous versions of the aspect as well as additional metadata such as the audit stamp.
 
-6. After rebuilding & restarting [gms](https://github.com/datahub-project/datahub/tree/master/gms), [mce-consumer-job](https://github.com/datahub-project/datahub/tree/master/metadata-jobs/mce-consumer-job) & [mae-consumer-job](https://github.com/datahub-project/datahub/tree/master/metadata-jobs/mae-consumer-job),
+6. After rebuilding & restarting gms, [mce-consumer-job](https://github.com/datahub-project/datahub/tree/master/metadata-jobs/mce-consumer-job) & [mae-consumer-job](https://github.com/datahub-project/datahub/tree/master/metadata-jobs/mae-consumer-job),z
 you should be able to start emitting [MCE](../what/mxe.md) with the new aspect and have it automatically ingested & stored in DB.
diff --git a/docs/modeling/extending-the-metadata-model.md b/docs/modeling/extending-the-metadata-model.md
index 98f70f6d933e4..be2d7d795de70 100644
--- a/docs/modeling/extending-the-metadata-model.md
+++ b/docs/modeling/extending-the-metadata-model.md
@@ -24,7 +24,7 @@ We will refer to the two options as the **open-source fork** and **custom reposi
 ## This Guide
 
 This guide will outline what the experience of adding a new Entity should look like through a real example of adding the
-Dashboard Entity. If you want to extend an existing Entity, you can skip directly to [Step 3](#step_3).
+Dashboard Entity. If you want to extend an existing Entity, you can skip directly to [Step 3](#step-3-define-custom-aspects-or-attach-existing-aspects-to-your-entity).
 
 At a high level, an entity is made up of:
 
@@ -82,14 +82,14 @@ Because they are aspects, keys need to be annotated with an @Aspect annotation,
 can be a part of.
 
 The key can also be annotated with the two index annotations: @Relationship and @Searchable. This instructs DataHub
-infra to use the fields in the key to create relationships and index fields for search. See [Step 3](#step_3) for more details on
+infra to use the fields in the key to create relationships and index fields for search. See [Step 3](#step-3-define-custom-aspects-or-attach-existing-aspects-to-your-entity) for more details on
 the annotation model.
 
 **Constraints**: Note that each field in a Key Aspect MUST be of String or Enum type.
 
 ### <a name="step_2"></a>Step 2: Create the new entity with its key aspect
 
-Define the entity within an `entity-registry.yml` file. Depending on your approach, the location of this file may vary. More on that in steps [4](#step_4) and [5](#step_5).
+Define the entity within an `entity-registry.yml` file. Depending on your approach, the location of this file may vary. More on that in steps [4](#step-4-choose-a-place-to-store-your-model-extension) and [5](#step-5-attaching-your-non-key-aspects-to-the-entity).
 
 Example:
 ```yaml
@@ -212,11 +212,11 @@ After you create your Aspect, you need to attach to all the entities that it app
 
 **Constraints**: Note that all aspects MUST be of type Record.
 
-### <a name="step_4"></a> Step 4: Choose a place to store your model extension
+### <a name="step_4"></a>Step 4: Choose a place to store your model extension
 
 At the beginning of this document, we walked you through a flow-chart that should help you decide whether you need to maintain a fork of the open source DataHub repo for your model extensions, or whether you can just use a model extension repository that can stay independent of the DataHub repo. Depending on what path you took, the place you store your aspect model files (the .pdl files) and the entity-registry files (the yaml file called `entity-registry.yaml` or `entity-registry.yml`) will vary.
 
-- Open source Fork: Aspect files go under [`metadata-models`](../../metadata-models) module in the main repo, entity registry goes into [`metadata-models/src/main/resources/entity-registry.yml`](../../metadata-models/src/main/resources/entity-registry.yml). Read on for more details in [Step 5](#step_5).
+- Open source Fork: Aspect files go under [`metadata-models`](../../metadata-models) module in the main repo, entity registry goes into [`metadata-models/src/main/resources/entity-registry.yml`](../../metadata-models/src/main/resources/entity-registry.yml). Read on for more details in [Step 5](#step-5-attaching-your-non-key-aspects-to-the-entity).
 - Custom repository: Read the [metadata-models-custom](../../metadata-models-custom/README.md) documentation to learn how to store and version your aspect models and registry.
 
 ### <a name="step_5"></a>Step 5: Attaching your non-key Aspect(s) to the Entity
diff --git a/docs/modeling/metadata-model.md b/docs/modeling/metadata-model.md
index 037c9c7108a6e..a8958985a0a72 100644
--- a/docs/modeling/metadata-model.md
+++ b/docs/modeling/metadata-model.md
@@ -433,7 +433,7 @@ aggregation query against a timeseries aspect.
 The *@TimeseriesField* and the *@TimeseriesFieldCollection* are two new annotations that can be attached to a field of
 a *Timeseries aspect* that allows it to be part of an aggregatable query. The kinds of aggregations allowed on these
 annotated fields depends on the type of the field, as well as the kind of aggregation, as
-described [here](#Performing-an-aggregation-on-a-Timeseries-aspect).
+described [here](#performing-an-aggregation-on-a-timeseries-aspect).
 
 * `@TimeseriesField = {}` - this annotation can be used with any type of non-collection type field of the aspect such as
   primitive types and records (see the fields *stat*, *strStat* and *strArray* fields
@@ -515,7 +515,7 @@ my_emitter = DatahubRestEmitter("http://localhost:8080")
 my_emitter.emit(mcpw)
 ```
 
-###### Performing an aggregation on a Timeseries aspect.
+###### Performing an aggregation on a Timeseries aspect
 
 Aggreations on timeseries aspects can be performed by the GMS REST API for `/analytics?action=getTimeseriesStats` which
 accepts the following params.
diff --git a/docs/tags.md b/docs/tags.md
index 945b514dc7b47..cb08c9fafea49 100644
--- a/docs/tags.md
+++ b/docs/tags.md
@@ -27,25 +27,25 @@ You can create these privileges by creating a new [Metadata Policy](./authorizat
 To add a tag at the dataset or container level, simply navigate to the page for that entity and click on the **Add Tag** button.
 
 <p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/add-tag.png"/>
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/master/imgs/add-tag.png"/>
 </p>
 
 Type in the name of the tag you want to add. You can add a new tag, or add a tag that already exists (the autocomplete will pull up the tag if it already exists).
 
 <p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/add-tag-search.png"/>
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/master/imgs/add-tag-search.png"/>
 </p>
 
 Click on the "Add" button and you'll see the tag has been added!
 
 <p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/added-tag.png"/>
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/master/imgs/added-tag.png"/>
 </p>
 
 If you would like to add a tag at the schema level, hover over the "Tags" column for a schema until the "Add Tag" button shows up, and then follow the same flow as above.
 
 <p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/add-schema-tag.png"/>
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/master/imgs/add-schema-tag.png"/>
 </p>
 
 ### Removing a Tag
@@ -57,7 +57,7 @@ To remove a tag, simply click on the "X" button in the tag. Then click "Yes" whe
 You can search for a tag in the search bar, and even filter entities by the presence of a specific tag.
 
 <p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/datahub/master/docs/imgs/search-tag.png"/>
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/master/imgs/search-tag.png"/>
 </p>
 
 ## Additional Resources
diff --git a/docs/townhall-history.md b/docs/townhall-history.md
index e235a70c5d7b9..d92905af0cd72 100644
--- a/docs/townhall-history.md
+++ b/docs/townhall-history.md
@@ -328,7 +328,7 @@ November Town Hall (in December!)
 
 * Welcome - 5 mins
 * Latest React App Demo! ([video](https://www.youtube.com/watch?v=RQBEJhcen5E)) by John Joyce and Gabe Lyons - 5 mins
-* Use-Case: DataHub at Geotab ([slides](https://docs.google.com/presentation/d/1qcgO3BW5NauuG0HnPqrxGcujsK-rJ1-EuU-7cbexkqE/edit?usp=sharing),[video](https://www.youtube.com/watch?v=boyjT2OrlU4)) by [John Yoon](https://www.linkedin.com/in/yhjyoon/) - 15 mins
+* Use-Case: DataHub at Geotab ([video](https://www.youtube.com/watch?v=boyjT2OrlU4)) by [John Yoon](https://www.linkedin.com/in/yhjyoon/) - 15 mins
 * Tech Deep Dive: Tour of new pull-based Python Ingestion scripts ([slides](https://docs.google.com/presentation/d/15Xay596WDIhzkc5c8DEv6M-Bv1N4hP8quup1tkws6ms/edit#slide=id.gb478361595_0_10),[video](https://www.youtube.com/watch?v=u0IUQvG-_xI)) by [Harshal Sheth](https://www.linkedin.com/in/hsheth2/) - 15 mins
 * General Q&A from sign up sheet, slack, and participants - 15 mins
 * Closing remarks - 5 mins
diff --git a/docs/what/gms.md b/docs/what/gms.md
index 9e1cea1b9540e..a39450d28ae83 100644
--- a/docs/what/gms.md
+++ b/docs/what/gms.md
@@ -2,6 +2,4 @@
 
 Metadata for [entities](entity.md) [onboarded](../modeling/metadata-model.md) to [GMA](gma.md) is served through microservices known as Generalized Metadata Service (GMS). GMS typically provides a [Rest.li](http://rest.li) API and must access the metadata using [GMA DAOs](../architecture/metadata-serving.md). 
 
-While a GMS is completely free to define its public APIs, we do provide a list of [resource base classes](https://github.com/datahub-project/datahub-gma/tree/master/restli-resources/src/main/java/com/linkedin/metadata/restli) to leverage for common patterns.
-
-GMA is designed to support a distributed fleet of GMS, each serving a subset of the [GMA graph](graph.md). However, for simplicity we include a single centralized GMS ([datahub-gms](../../gms)) that serves all entities.
+GMA is designed to support a distributed fleet of GMS, each serving a subset of the [GMA graph](graph.md). However, for simplicity we include a single centralized GMS that serves all entities.
diff --git a/docs/what/mxe.md b/docs/what/mxe.md
index 8af96360858a3..25294e04ea3d9 100644
--- a/docs/what/mxe.md
+++ b/docs/what/mxe.md
@@ -266,7 +266,7 @@ A Metadata Change Event represents a request to change multiple aspects for the
 It leverages a deprecated concept of `Snapshot`, which is a strongly-typed list of aspects for the same
 entity. 
 
-A MCE is a "proposal" for a set of metadata changes, as opposed to [MAE](#metadata-audit-event), which is conveying a committed change.
+A MCE is a "proposal" for a set of metadata changes, as opposed to [MAE](#metadata-audit-event-mae), which is conveying a committed change.
 Consequently, only successfully accepted and processed MCEs will lead to the emission of a corresponding MAE / MCLs.
 
 ### Emission
diff --git a/docs/what/relationship.md b/docs/what/relationship.md
index dcfe093a1b124..d5348dc04b3c0 100644
--- a/docs/what/relationship.md
+++ b/docs/what/relationship.md
@@ -102,9 +102,6 @@ For one, the actual direction doesn’t really impact the execution of graph que
 
 That being said, generally there’s a more "natural way" to specify the direction of a relationship, which closely relate to how the metadata is stored. For example, the membership information for an LDAP group is generally stored as a list in group’s metadata. As a result, it’s more natural to model a `HasMember` relationship that points from a group to a member, instead of a `IsMemberOf` relationship pointing from member to group.
 
-Since all relationships are explicitly declared, it’s fairly easy for a user to discover what relationships are available and their directionality by inspecting 
-the [relationships directory](../../metadata-models/src/main/pegasus/com/linkedin/metadata/relationship). It’s also possible to provide a UI for the catalog of entities and relationships for analysts who are interested in building complex graph queries to gain insights into the metadata.
-
 ## High Cardinality Relationships
 
 See [this doc](../advanced/high-cardinality.md) for suggestions on how to best model relationships with high cardinality.
diff --git a/docs/what/search-document.md b/docs/what/search-document.md
index 81359a55d0cae..bd27656e512c3 100644
--- a/docs/what/search-document.md
+++ b/docs/what/search-document.md
@@ -13,7 +13,6 @@ As a result, one may be tempted to add as many attributes as needed. This is acc
 Below shows an example schema for the `User` search document. Note that:
 1. Each search document is required to have a type-specific `urn` field, generally maps to an entity in the [graph](graph.md).
 2. Similar to `Entity`, each document has an optional `removed` field for "soft deletion". 
-This is captured in [BaseDocument](../../metadata-models/src/main/pegasus/com/linkedin/metadata/search/BaseDocument.pdl), which is expected to be included by all documents.
 3. Similar to `Entity`, all remaining fields are made `optional` to support partial updates.
 4. `management` shows an example of a string array field.
 5. `ownedDataset` shows an example on how a field can be derived from metadata [aspects](aspect.md) associated with other types of entity (in this case, `Dataset`).
diff --git a/metadata-ingestion/docs/dev_guides/add_stateful_ingestion_to_source.md b/metadata-ingestion/docs/dev_guides/add_stateful_ingestion_to_source.md
index 6a1204fb0f2b3..9e39d24fb8578 100644
--- a/metadata-ingestion/docs/dev_guides/add_stateful_ingestion_to_source.md
+++ b/metadata-ingestion/docs/dev_guides/add_stateful_ingestion_to_source.md
@@ -60,16 +60,14 @@ class StaleEntityCheckpointStateBase(CheckpointStateBase, ABC, Generic[Derived])
 ```
 
 Examples: 
-1. [KafkaCheckpointState](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/state/kafka_state.py#L11).
-2. [DbtCheckpointState](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/state/dbt_state.py#L16)
-3. [BaseSQLAlchemyCheckpointState](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/state/sql_common_state.py#L17)
+* [BaseSQLAlchemyCheckpointState](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/state/sql_common_state.py#L17)
 
 ### 2. Modifying the SourceConfig
 
 The source's config must inherit from `StatefulIngestionConfigBase`, and should declare a field named `stateful_ingestion` of type `Optional[StatefulStaleMetadataRemovalConfig]`.
 
 Examples:
-1. The `KafkaSourceConfig`
+- The `KafkaSourceConfig`
 ```python
 from typing import List, Optional
 import pydantic
@@ -84,9 +82,6 @@ class KafkaSourceConfig(StatefulIngestionConfigBase):
     stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
 ```
 
-2. The [DBTStatefulIngestionConfig](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/dbt.py#L131)
-   and the [DBTConfig](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/dbt.py#L317).
-
 ### 3. Modifying the SourceReport
 The report class of the source should inherit from `StaleEntityRemovalSourceReport` whose definition is shown below.
 ```python
@@ -102,7 +97,7 @@ class StaleEntityRemovalSourceReport(StatefulIngestionReport):
 ```
 
 Examples:
-1. The `KafkaSourceReport`
+* The `KafkaSourceReport`
 ```python
 from dataclasses import dataclass
 from datahub.ingestion.source.state.stale_entity_removal_handler import StaleEntityRemovalSourceReport
@@ -110,7 +105,7 @@ from datahub.ingestion.source.state.stale_entity_removal_handler import StaleEnt
 class KafkaSourceReport(StaleEntityRemovalSourceReport):
     # <rest of kafka source report specific impl
 ```
-2. [DBTSourceReport](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/dbt.py#L142)
+
 ### 4. Modifying the Source
 The source must inherit from `StatefulIngestionSourceBase`.
 
diff --git a/metadata-ingestion/docs/dev_guides/reporting_telemetry.md b/metadata-ingestion/docs/dev_guides/reporting_telemetry.md
index 1e770ab025711..11aec2efe9714 100644
--- a/metadata-ingestion/docs/dev_guides/reporting_telemetry.md
+++ b/metadata-ingestion/docs/dev_guides/reporting_telemetry.md
@@ -69,7 +69,7 @@ reporting:
 An ingestion reporting state provider is responsible for saving and retrieving the ingestion telemetry 
 associated with the ingestion runs of various jobs inside the source connector of the ingestion pipeline. 
 The data model used for capturing the telemetry is [DatahubIngestionRunSummary](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/datajob/datahub/DatahubIngestionRunSummary.pdl). 
-A reporting ingestion state provider needs to implement the [IngestionReportingProviderBase](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/api/ingestion_job_reporting_provider_base.py)
+A reporting ingestion state provider needs to implement the IngestionReportingProviderBase.
 interface and register itself with datahub by adding an entry under `datahub.ingestion.reporting_provider.plugins` 
 key of the entry_points section in [setup.py](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/setup.py) 
 with its type and implementation class as shown below. 
diff --git a/metadata-ingestion/docs/dev_guides/stateful.md b/metadata-ingestion/docs/dev_guides/stateful.md
index b3a409e965c62..08ccf015c994c 100644
--- a/metadata-ingestion/docs/dev_guides/stateful.md
+++ b/metadata-ingestion/docs/dev_guides/stateful.md
@@ -22,14 +22,14 @@ noCode: "true"
 
 Note that a `.` is used to denote nested fields in the YAML recipe.
 
-| Field                                                        | Required | Default                                                                                                          | Description                                                                                                                                                 |
-|--------------------------------------------------------------| -------- |------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `source.config.stateful_ingestion.enabled`                   |          | False                                                                                                            | The type of the ingestion state provider registered with datahub.                                                                                           |
-| `source.config.stateful_ingestion.ignore_old_state`          |          | False                                                                                                            | If set to True, ignores the previous checkpoint state.                                                                                                      |
-| `source.config.stateful_ingestion.ignore_new_state`          |          | False                                                                                                            | If set to True, ignores the current checkpoint state.                                                                                                       |
-| `source.config.stateful_ingestion.max_checkpoint_state_size` |          | 2^24 (16MB)                                                                                                      | The maximum size of the checkpoint state in bytes.                                                                                                          |
-| `source.config.stateful_ingestion.state_provider`            |          | The default [datahub ingestion state provider](#datahub-ingestion-state-provider) configuration. | The ingestion state provider configuration.                                                                                                                 |
-| `pipeline_name`                                              |    ✅    |                                                                                                                  | The name of the ingestion pipeline the checkpoint states of various source connector job runs are saved/retrieved against via the ingestion state provider. |
+| Field                                                        | Required | Default                                                                                                         | Description                                                                                                                                                 |
+|--------------------------------------------------------------| -------- |-----------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `source.config.stateful_ingestion.enabled`                   |          | False                                                                                                           | The type of the ingestion state provider registered with datahub.                                                                                           |
+| `source.config.stateful_ingestion.ignore_old_state`          |          | False                                                                                                           | If set to True, ignores the previous checkpoint state.                                                                                                      |
+| `source.config.stateful_ingestion.ignore_new_state`          |          | False                                                                                                           | If set to True, ignores the current checkpoint state.                                                                                                       |
+| `source.config.stateful_ingestion.max_checkpoint_state_size` |          | 2^24 (16MB)                                                                                                     | The maximum size of the checkpoint state in bytes.                                                                                                          |
+| `source.config.stateful_ingestion.state_provider`            |          | The default datahub ingestion state provider configuration. | The ingestion state provider configuration.                                                                                                                 |
+| `pipeline_name`                                              |    ✅    |                                                                                                                 | The name of the ingestion pipeline the checkpoint states of various source connector job runs are saved/retrieved against via the ingestion state provider. |
 
 NOTE: If either `dry-run` or `preview` mode are set, stateful ingestion will be turned off regardless of the rest of the configuration.
 ## Use-cases powered by stateful ingestion.
diff --git a/metadata-ingestion/docs/sources/gcs/README.md b/metadata-ingestion/docs/sources/gcs/README.md
index 2d021950e83de..d6bb8147f076d 100644
--- a/metadata-ingestion/docs/sources/gcs/README.md
+++ b/metadata-ingestion/docs/sources/gcs/README.md
@@ -9,8 +9,8 @@ and uses DataHub S3 Data Lake integration source under the hood. Refer section [
 This ingestion source maps the following Source System Concepts to DataHub Concepts:
 
 | Source Concept                             | DataHub Concept                                                                            | Notes                |
-| ------------------------------------------ | ------------------------------------------------------------------------------------------ | -------------------- |
-| `"Google Cloud Storage"`                   | [Data Platform](https://datahubproject.io/docs/generated/metamodel/entities/dataPlatform/) |                      |
+| ------------------------------------------ |--------------------------------------------------------------------------------------------| -------------------- |
+| `"Google Cloud Storage"`                   | [Data Platform](https://datahubproject.io/docs/generated/metamodel/entities/dataplatform/) |                      |
 | GCS object / Folder containing GCS objects | [Dataset](https://datahubproject.io/docs/generated/metamodel/entities/dataset/)            |                      |
 | GCS bucket                                 | [Container](https://datahubproject.io/docs/generated/metamodel/entities/container/)        | Subtype `GCS bucket` |
 | GCS folder                                 | [Container](https://datahubproject.io/docs/generated/metamodel/entities/container/)        | Subtype `Folder`     |
diff --git a/metadata-ingestion/docs/sources/kafka-connect/README.md b/metadata-ingestion/docs/sources/kafka-connect/README.md
index ac3728b6eacba..5031bff5a3fac 100644
--- a/metadata-ingestion/docs/sources/kafka-connect/README.md
+++ b/metadata-ingestion/docs/sources/kafka-connect/README.md
@@ -10,11 +10,11 @@ This plugin extracts the following:
 
 This ingestion source maps the following Source System Concepts to DataHub Concepts:
 
-| Source Concept              | DataHub Concept                                               | Notes                                                                       |
-| --------------------------- | ------------------------------------------------------------- | --------------------------------------------------------------------------- |
-| `"kafka-connect"`                 | [Data Platform](https://datahubproject.io/docs/generated/metamodel/entities/dataPlatform/)     |                                                                             |
-| [Connector](https://kafka.apache.org/documentation/#connect_connectorsandtasks)         | [DataFlow](https://datahubproject.io/docs/generated/metamodel/entities/dataflow/)                | |
-| Kafka Topic         | [Dataset](https://datahubproject.io/docs/generated/metamodel/entities/dataset/)                | |
+| Source Concept              | DataHub Concept                                                                            | Notes                                                                       |
+| --------------------------- |--------------------------------------------------------------------------------------------| --------------------------------------------------------------------------- |
+| `"kafka-connect"`                 | [Data Platform](https://datahubproject.io/docs/generated/metamodel/entities/dataplatform/) |                                                                             |
+| [Connector](https://kafka.apache.org/documentation/#connect_connectorsandtasks)         | [DataFlow](https://datahubproject.io/docs/generated/metamodel/entities/dataflow/)          | |
+| Kafka Topic         | [Dataset](https://datahubproject.io/docs/generated/metamodel/entities/dataset/)            | |
 
 ## Current limitations
 
diff --git a/metadata-ingestion/docs/sources/s3/README.md b/metadata-ingestion/docs/sources/s3/README.md
index 17fed8a70abb4..8d65e1cf8b943 100644
--- a/metadata-ingestion/docs/sources/s3/README.md
+++ b/metadata-ingestion/docs/sources/s3/README.md
@@ -6,8 +6,8 @@ To specify the group of files that form a dataset, use `path_specs` configuratio
 This ingestion source maps the following Source System Concepts to DataHub Concepts:
 
 | Source Concept                           | DataHub Concept                                                                            | Notes               |
-| ---------------------------------------- | ------------------------------------------------------------------------------------------ | ------------------- |
-| `"s3"`                                   | [Data Platform](https://datahubproject.io/docs/generated/metamodel/entities/dataPlatform/) |                     |
+| ---------------------------------------- |--------------------------------------------------------------------------------------------| ------------------- |
+| `"s3"`                                   | [Data Platform](https://datahubproject.io/docs/generated/metamodel/entities/dataplatform/) |                     |
 | s3 object / Folder containing s3 objects | [Dataset](https://datahubproject.io/docs/generated/metamodel/entities/dataset/)            |                     |
 | s3 bucket                                | [Container](https://datahubproject.io/docs/generated/metamodel/entities/container/)        | Subtype `S3 bucket` |
 | s3 folder                                | [Container](https://datahubproject.io/docs/generated/metamodel/entities/container/)        | Subtype `Folder`    |
diff --git a/metadata-ingestion/examples/transforms/README.md b/metadata-ingestion/examples/transforms/README.md
index 5bcd8b5cac37d..47198fc76763f 100644
--- a/metadata-ingestion/examples/transforms/README.md
+++ b/metadata-ingestion/examples/transforms/README.md
@@ -2,4 +2,4 @@
 
 This script sets up a transformer that reads in a list of owner URNs from a JSON file specified via `owners_json` and appends these owners to every MCE.
 
-See the transformers tutorial (https://datahubproject.io/docs/metadata-ingestion/transformers) for how this module is built and run.
+See the transformers tutorial (https://datahubproject.io/docs/metadata-ingestion/docs/transformer/intro) for how this module is built and run.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py
index c38800b3a6983..31c568941c04e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py
@@ -112,9 +112,6 @@ class TrinoUsageSource(Source):
 
     #### Prerequsities
     1. You need to setup Event Logger which saves audit logs into a Postgres db and setup this db as a catalog in Trino
-    Here you can find more info about how to setup:
-    https://docs.starburst.io/354-e/security/event-logger.html#security-event-logger--page-root
-    https://docs.starburst.io/354-e/security/event-logger.html#analyzing-the-event-log
 
     2. Install starbust-trino-usage plugin
     Run pip install 'acryl-datahub[starburst-trino-usage]'.
diff --git a/metadata-jobs/README.md b/metadata-jobs/README.md
index b718e0ca8485e..c7e4f79c62f56 100644
--- a/metadata-jobs/README.md
+++ b/metadata-jobs/README.md
@@ -2,9 +2,9 @@
 DataHub uses Kafka as the pub-sub message queue in the backend. There are 2 Kafka topics used by DataHub which are
 `MetadataChangeEvent` and `MetadataAuditEvent`.
 * `MetadataChangeEvent:` This message is emitted by any data platform or crawler in which there is a change in the metadata.
-* `MetadataAuditEvent:` This message is emitted by [DataHub GMS](../gms) to notify that metadata change is registered.
+* `MetadataAuditEvent:` This message is emitted by [DataHub GMS](../metadata-service/README.md) to notify that metadata change is registered.
 
 To be able to consume from these two topics, there are two Spring
  jobs DataHub uses:
-* [MCE Consumer Job](mce-consumer-job): Writes to [DataHub GMS](../gms)
+* [MCE Consumer Job](mce-consumer-job): Writes to [DataHub GMS](../metadata-service/README.md)
 * [MAE Consumer Job](mae-consumer-job): Writes to [Elasticsearch](../docker/elasticsearch) & [Neo4j](../docker/neo4j)
diff --git a/metadata-models/docs/entities/dataPlatform.md b/metadata-models/docs/entities/dataPlatform.md
index 977a3d1a264a3..58ca83c9c6bbc 100644
--- a/metadata-models/docs/entities/dataPlatform.md
+++ b/metadata-models/docs/entities/dataPlatform.md
@@ -6,6 +6,4 @@ Examples of data platforms are `redshift`, `hive`, `bigquery`, `looker`, `tablea
 
 ## Identity
 
-Data Platforms are identified by the name of the technology. A complete list of currently supported data platforms is available [here](https://raw.githubusercontent.com/datahub-project/datahub/master/metadata-service/restli-servlet-impl/src/main/resources/DataPlatformInfo.json).
-
-
+Data Platforms are identified by the name of the technology. A complete list of currently supported data platforms is available [here](https://raw.githubusercontent.com/datahub-project/datahub/master/metadata-service/war/src/main/resources/boot/data_platforms.json).
\ No newline at end of file

From 654b502d1d94085598b8f64d26d5bdae2d79bfbf Mon Sep 17 00:00:00 2001
From: Aseem Bansal <asmbansal2@gmail.com>
Date: Thu, 14 Sep 2023 11:40:38 +0530
Subject: [PATCH 011/156] docs(managed datahub): release notes 0.2.11 (#8830)

---
 docs-website/sidebars.js                      |  1 +
 .../managed-datahub/release-notes/v_0_2_11.md | 73 +++++++++++++++++++
 2 files changed, 74 insertions(+)
 create mode 100644 docs/managed-datahub/release-notes/v_0_2_11.md

diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index fcf82b786a1b9..12691e9f8268a 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -597,6 +597,7 @@ module.exports = {
         },
         {
           "Managed DataHub Release History": [
+            "docs/managed-datahub/release-notes/v_0_2_11",
             "docs/managed-datahub/release-notes/v_0_2_10",
             "docs/managed-datahub/release-notes/v_0_2_9",
             "docs/managed-datahub/release-notes/v_0_2_8",
diff --git a/docs/managed-datahub/release-notes/v_0_2_11.md b/docs/managed-datahub/release-notes/v_0_2_11.md
new file mode 100644
index 0000000000000..1f42090848712
--- /dev/null
+++ b/docs/managed-datahub/release-notes/v_0_2_11.md
@@ -0,0 +1,73 @@
+# v0.2.11
+---
+
+Release Availability Date
+---
+14-Sep-2023
+
+Recommended CLI/SDK
+---
+- `v0.11.0` with release notes at https://github.com/acryldata/datahub/releases/tag/v0.10.5.5
+- [Deprecation] In LDAP ingestor, the manager_pagination_enabled changed to general pagination_enabled
+
+If you are using an older CLI/SDK version then please upgrade it. This applies for all CLI/SDK usages, if you are using it through your terminal, github actions, airflow, in python SDK somewhere, Java SKD etc. This is a strong recommendation to upgrade as we keep on pushing fixes in the CLI and it helps us support you better.
+
+Special Notes
+---
+- Deployment process for this release is going to have a downtime when systme will be in a read only mode. A rough estimate 1 hour for every 2.3 million entities (includes soft-deleted entities).
+
+
+## Release Changelog
+---
+- Since `v0.2.10` these changes from OSS DataHub https://github.com/datahub-project/datahub/compare/2b0952195b7895df0a2bf92b28e71aac18217781...75252a3d9f6a576904be5a0790d644b9ae2df6ac have been pulled in.
+- Misc fixes & features
+    - Proposals
+        - Group names shown correctly for proposal Inbox
+    - Metadata tests
+        - Deprecate/Un-deprecate actions available in Metadata tests
+        - Last Observed (in underlying sql) available as a filter in metadata tests
+        - [Breaking change] Renamed `__lastUpdated` -> `__created` as a filter  to correctly represent what it was. This was not surfaced in the UI. But if you were using it then this needs to be renamed. Acryl Customer Success team will keep an eye out to pro-actively find and bring this up if you are affected by this.
+        - Robustness improvements to metadata test runs
+        - Copy urn for metadata tests to allow for easier filtering for iteration over metadata test results via our APIs.
+    - A lot more fixes to subscriptions, notifications and Observability (Beta).
+    - Some performance improvements to lineage queries
+
+## Some notable features in this SaaS release
+- We now enable you to create and delete pinned announcements on your DataHub homepage! If you have the “Manage Home Page Posts” platform privilege you’ll see a new section in settings called “Home Page Posts” where you can create and delete text posts and link posts that your users see on the home page.
+- Improvements to search experience
+<div style={{ position: "relative", paddingBottom: "56.25%", height: 0 }}>
+  <iframe
+    src="https://www.loom.com/embed/97abf74703d04457b96da3fed041089d"
+    frameBorder={0}
+    webkitallowfullscreen=""
+    mozallowfullscreen=""
+    allowFullScreen=""
+    style={{
+      position: "absolute",
+      top: 0,
+      left: 0,
+      width: "100%",
+      height: "100%"
+    }}
+  />
+</div>
+- The CLI now supports recursive deletes
+- New subscriptions feature will be widely rolled out this release
+<div style={{ position: "relative", paddingBottom: "56.25%", height: 0 }}>
+  <iframe
+    src="https://www.loom.com/embed/f02fe71e09494b5e82904c8a47f06ac1"
+    frameBorder={0}
+    webkitallowfullscreen=""
+    mozallowfullscreen=""
+    allowFullScreen=""
+    style={{
+      position: "absolute",
+      top: 0,
+      left: 0,
+      width: "100%",
+      height: "100%"
+    }}
+  />
+</div>
+- We will be enabling these features selectively. If you are interested in trying it and providing feedback, please reach out to your Acryl Customer Success representative.
+    - Acryl Observe Freshness Assertions available in private beta as shared [here](../observe/freshness-assertions.md). 

From e75900b9a9e1a4febe584765e59caee3ecb1af14 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Thu, 14 Sep 2023 12:25:41 -0700
Subject: [PATCH 012/156] build(ingest): Remove constraint on jsonschema for
 Python >= 3.8 (#8842)

---
 metadata-ingestion/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 7a5fd355803cb..3067ccd71f92f 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -58,8 +58,8 @@ def get_long_description():
     "requests_file",
     "jsonref",
     # jsonschema drops python 3.7 support in v4.18.0
-    "jsonschema<=4.17.3 ;  python_version < '3.8'",
-    "jsonschema>=4.18.0 ;  python_version >= '3.8'",
+    "jsonschema<=4.17.3; python_version < '3.8'",
+    "jsonschema; python_version >= '3.8'",
     "ruamel.yaml",
 }
 

From 319342197689fe8475bfdc05e2f2dcd65a784cdc Mon Sep 17 00:00:00 2001
From: Aseem Bansal <asmbansal2@gmail.com>
Date: Fri, 15 Sep 2023 17:58:30 +0530
Subject: [PATCH 013/156] fix(build): clean task cleanup generated src (#8844)

---
 metadata-events/mxe-avro-1.7/build.gradle | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/metadata-events/mxe-avro-1.7/build.gradle b/metadata-events/mxe-avro-1.7/build.gradle
index e30406644913c..8c0a26d22dc7d 100644
--- a/metadata-events/mxe-avro-1.7/build.gradle
+++ b/metadata-events/mxe-avro-1.7/build.gradle
@@ -43,4 +43,8 @@ jar {
   dependsOn classes
   from sourceSets.main.output
   exclude('com/linkedin/events/**')
+}
+
+clean {
+    delete 'src'
 }
\ No newline at end of file

From ec714fc1e57a36a3418edae87d5f255e25941b41 Mon Sep 17 00:00:00 2001
From: Aseem Bansal <asmbansal2@gmail.com>
Date: Fri, 15 Sep 2023 20:04:11 +0530
Subject: [PATCH 014/156] feat(ci): disable ingestion smoke build (#8845)

---
 .github/workflows/docker-ingestion-smoke.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/docker-ingestion-smoke.yml b/.github/workflows/docker-ingestion-smoke.yml
index 9e74f3a459378..8d52c23792857 100644
--- a/.github/workflows/docker-ingestion-smoke.yml
+++ b/.github/workflows/docker-ingestion-smoke.yml
@@ -3,8 +3,6 @@ on:
   release:
     types: [published]
   push:
-    branches:
-      - master
     paths:
       - "docker/datahub-ingestion-base/**"
       - "smoke-test/**"

From 0f7744784d663b377f1743db188d8632b9f6a86c Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Sat, 16 Sep 2023 03:55:10 +0900
Subject: [PATCH 015/156] fix: fix quickstart page (#8784)

---
 docs/quickstart.md | 336 +++++++++++++++++++++++++--------------------
 1 file changed, 184 insertions(+), 152 deletions(-)

diff --git a/docs/quickstart.md b/docs/quickstart.md
index cd91dc8d1ac84..29b22b54dc87a 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -1,219 +1,218 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
 # DataHub Quickstart Guide
 
+:::tip Managed DataHub
+
 This guide provides instructions on deploying the open source DataHub locally.
-If you're interested in a managed version, [Acryl Data](https://www.acryldata.io/product) provides a fully managed, premium version of DataHub.
+If you're interested in a managed version, [Acryl Data](https://www.acryldata.io/product) provides a fully managed, premium version of DataHub. <br />
+**[Get Started with Managed DataHub](./managed-datahub/welcome-acryl.md)**
 
-<a
-    className='button button--primary button--lg'
-    href="/docs/managed-datahub/welcome-acryl">
-Get Started with Managed DataHub
-</a>
+:::
 
-## Deploying DataHub
+## Prerequisites
 
-To deploy a new instance of DataHub, perform the following steps.
+- Install **Docker** and **Docker Compose** v2 for your platform.
 
-1. Install Docker and Docker Compose v2 for your platform.
+  | Platform | Application                                                                                                                                     |
+  | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------- |
+  | Window   | [Docker Desktop](https://www.docker.com/products/docker-desktop/)                                                                               |
+  | Mac      | [Docker Desktop](https://www.docker.com/products/docker-desktop/)                                                                               |
+  | Linux    | [Docker for Linux](https://docs.docker.com/desktop/install/linux-install/) and [Docker Compose](https://docs.docker.com/compose/install/linux/) |
 
-- On Windows or Mac, install [Docker Desktop](https://www.docker.com/products/docker-desktop/).
-- On Linux, install [Docker for Linux](https://docs.docker.com/desktop/install/linux-install/) and [Docker Compose](https://docs.docker.com/compose/install/linux/).
+- **Launch the Docker engine** from command line or the desktop app.
+- Ensure you have **Python 3.7+** installed & configured. (Check using `python3 --version`).
 
-:::note
+:::note Docker Resource Allocation
 
-Make sure to allocate enough hardware resources for Docker engine.
+Make sure to allocate enough hardware resources for Docker engine. <br />
 Tested & confirmed config: 2 CPUs, 8GB RAM, 2GB Swap area, and 10GB disk space.
 
 :::
 
-2. Launch the Docker Engine from command line or the desktop app.
-
-3. Install the DataHub CLI
-
-   a. Ensure you have Python 3.7+ installed & configured. (Check using `python3 --version`).
-
-   b. Run the following commands in your terminal
+## Install the DataHub CLI
 
-   ```sh
-   python3 -m pip install --upgrade pip wheel setuptools
-   python3 -m pip install --upgrade acryl-datahub
-   datahub version
-   ```
+<Tabs>
+<TabItem value="pip" label="pip">
 
-   If you're using poetry, run the following command.
-
-   ```sh
-   poetry add acryl-datahub
-   datahub version
-   ```
+```bash
+python3 -m pip install --upgrade pip wheel setuptools
+python3 -m pip install --upgrade acryl-datahub
+datahub version
+```
 
-:::note
+:::note Command Not Found
 
-If you see "command not found", try running cli commands with the prefix 'python3 -m' instead like `python3 -m datahub version`
+If you see `command not found`, try running cli commands like `python3 -m datahub version`. <br />
 Note that DataHub CLI does not support Python 2.x.
 
 :::
 
-4. To deploy a DataHub instance locally, run the following CLI command from your terminal
-
-   ```
-   datahub docker quickstart
-   ```
-
-   This will deploy a DataHub instance using [docker-compose](https://docs.docker.com/compose/).
-   If you are curious, the `docker-compose.yaml` file is downloaded to your home directory under the `.datahub/quickstart` directory.
-
-   If things go well, you should see messages like the ones below:
-
-   ```
-   Fetching docker-compose file https://raw.githubusercontent.com/datahub-project/datahub/master/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml from GitHub
-   Pulling docker images...
-   Finished pulling docker images!
-
-   [+] Running 11/11
-   ⠿ Container zookeeper                  Running                                                                                                                                                         0.0s
-   ⠿ Container elasticsearch              Running                                                                                                                                                         0.0s
-   ⠿ Container broker                     Running                                                                                                                                                         0.0s
-   ⠿ Container schema-registry            Running                                                                                                                                                         0.0s
-   ⠿ Container elasticsearch-setup        Started                                                                                                                                                         0.7s
-   ⠿ Container kafka-setup                Started                                                                                                                                                         0.7s
-   ⠿ Container mysql                      Running                                                                                                                                                         0.0s
-   ⠿ Container datahub-gms                Running                                                                                                                                                         0.0s
-   ⠿ Container mysql-setup                Started                                                                                                                                                         0.7s
-   ⠿ Container datahub-datahub-actions-1  Running                                                                                                                                                         0.0s
-   ⠿ Container datahub-frontend-react     Running                                                                                                                                                         0.0s
-   .......
-   ✔ DataHub is now running
-   Ingest some demo data using `datahub docker ingest-sample-data`,
-   or head to http://localhost:9002 (username: datahub, password: datahub) to play around with the frontend.
-   Need support? Get in touch on Slack: https://slack.datahubproject.io/
-   ```
-
-   Upon completion of this step, you should be able to navigate to the DataHub UI
-   at [http://localhost:9002](http://localhost:9002) in your browser. You can sign in using `datahub` as both the
-   username and password.
-
-:::note
-
-On Mac computers with Apple Silicon (M1, M2 etc.), you might see an error like `no matching manifest for linux/arm64/v8 in the manifest list entries`, this typically means that the datahub cli was not able to detect that you are running it on Apple Silicon. To resolve this issue, override the default architecture detection by issuing `datahub docker quickstart --arch m1`
+</TabItem>
+<TabItem value="poetry" label="poetry">
 
-:::
+```bash
+poetry add acryl-datahub
+poetry shell
+datahub version
+```
 
-5. To ingest the sample metadata, run the following CLI command from your terminal
+</TabItem>
+</Tabs>
 
-   ```bash
-   datahub docker ingest-sample-data
-   ```
+## Start DataHub
 
-:::note
+Run the following CLI command from your terminal.
 
-If you've enabled [Metadata Service Authentication](authentication/introducing-metadata-service-authentication.md), you'll need to provide a Personal Access Token
-using the `--token <token>` parameter in the command.
+```bash
+datahub docker quickstart
+```
 
-:::
+This will deploy a DataHub instance using [docker-compose](https://docs.docker.com/compose/).
+If you are curious, the `docker-compose.yaml` file is downloaded to your home directory under the `.datahub/quickstart` directory.
+
+If things go well, you should see messages like the ones below:
+
+```shell-session
+Fetching docker-compose file https://raw.githubusercontent.com/datahub-project/datahub/master/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml from GitHub
+Pulling docker images...
+Finished pulling docker images!
+
+[+] Running 11/11
+⠿ Container zookeeper                  Running                                                                                                                                                         0.0s
+⠿ Container elasticsearch              Running                                                                                                                                                         0.0s
+⠿ Container broker                     Running                                                                                                                                                         0.0s
+⠿ Container schema-registry            Running                                                                                                                                                         0.0s
+⠿ Container elasticsearch-setup        Started                                                                                                                                                         0.7s
+⠿ Container kafka-setup                Started                                                                                                                                                         0.7s
+⠿ Container mysql                      Running                                                                                                                                                         0.0s
+⠿ Container datahub-gms                Running                                                                                                                                                         0.0s
+⠿ Container mysql-setup                Started                                                                                                                                                         0.7s
+⠿ Container datahub-datahub-actions-1  Running                                                                                                                                                         0.0s
+⠿ Container datahub-frontend-react     Running                                                                                                                                                         0.0s
+.......
+✔ DataHub is now running
+Ingest some demo data using `datahub docker ingest-sample-data`,
+or head to http://localhost:9002 (username: datahub, password: datahub) to play around with the frontend.
+Need support? Get in touch on Slack: https://slack.datahubproject.io/
+```
 
-That's it! Now feel free to play around with DataHub!
+:::note Mac M1/M2
 
-## Troubleshooting Issues
+On Mac computers with Apple Silicon (M1, M2 etc.), you might see an error like `no matching manifest for linux/arm64/v8 in the manifest list entries`.
+This typically means that the datahub cli was not able to detect that you are running it on Apple Silicon.
+To resolve this issue, override the default architecture detection by issuing `datahub docker quickstart --arch m1`
 
-Please refer to [Quickstart Debugging Guide](./troubleshooting/quickstart.md).
+:::
 
-## Next Steps
+### Sign In
 
-### Ingest Metadata
+Upon completion of this step, you should be able to navigate to the DataHub UI at [http://localhost:9002](http://localhost:9002) in your browser.
+You can sign in using the default credentials below.
 
-To start pushing your company's metadata into DataHub, take a look at [UI-based Ingestion Guide](./ui-ingestion.md), or to run ingestion using the cli, look at the [Metadata Ingestion Guide](../metadata-ingestion/README.md).
+```json
+username: datahub
+password: datahub
+```
 
-### Invite Users
+To change the default credentials, please refer to [Change the default user datahub in quickstart](authentication/changing-default-credentials.md#quickstart).
 
-To add users to your deployment to share with your team check out our [Adding Users to DataHub](authentication/guides/add-users.md)
+### Ingest Sample Data
 
-### Enable Authentication
+To ingest the sample metadata, run the following CLI command from your terminal
 
-To enable SSO, check out [Configuring OIDC Authentication](authentication/guides/sso/configure-oidc-react.md) or [Configuring JaaS Authentication](authentication/guides/jaas.md).
+```bash
+datahub docker ingest-sample-data
+```
 
-To enable backend Authentication, check out [authentication in DataHub's backend](authentication/introducing-metadata-service-authentication.md#configuring-metadata-service-authentication).
+:::note Token Authentication
 
-### Change the Default `datahub` User Credentials
+If you've enabled [Metadata Service Authentication](authentication/introducing-metadata-service-authentication.md), you'll need to provide a Personal Access Token
+using the `--token <token>` parameter in the command.
 
-:::note
-Please note that deleting the `Data Hub` user in the UI **WILL NOT** disable the default user. You will still be able to log in using the default 'datahub:datahub' credentials. To safely delete the default credentials, please follow the guide provided below.
 :::
 
-Please refer to [Change the default user datahub in quickstart](authentication/changing-default-credentials.md#quickstart).
-
-### Move to Production
+That's it! Now feel free to play around with DataHub!
 
-We recommend deploying DataHub to production using Kubernetes. We provide helpful [Helm Charts](https://artifacthub.io/packages/helm/datahub/datahub) to help you quickly get up and running. Check out [Deploying DataHub to Kubernetes](./deploy/kubernetes.md) for a step-by-step walkthrough.
+---
 
-The `quickstart` method of running DataHub is intended for local development and a quick way to experience the features that DataHub has to offer. It is not
-intended for a production environment. This recommendation is based on the following points.
+## Common Operations
 
-#### Default Credentials
+### Stop DataHub
 
-`quickstart` uses docker-compose configuration which includes default credentials for both DataHub, and it's underlying
-prerequisite data stores, such as MySQL. Additionally, other components are unauthenticated out of the box. This is a
-design choice to make development easier and is not best practice for a production environment.
-
-#### Exposed Ports
+To stop DataHub's quickstart, you can issue the following command.
 
-DataHub's services, and it's backend data stores use the docker default behavior of binding to all interface addresses.
-This makes it useful for development but is not recommended in a production environment.
+```bash
+datahub docker quickstart --stop
+```
 
-#### Performance & Management
+### Reset DataHub
 
-* `quickstart` is limited by the resources available on a single host, there is no ability to scale horizontally.
-* Rollout of new versions requires downtime.
-* The configuration is largely pre-determined and not easily managed.
-* `quickstart`, by default, follows the most recent builds forcing updates to the latest released and unreleased builds.
+To cleanse DataHub of all of its state (e.g. before ingesting your own), you can use the CLI `nuke` command.
 
-## Other Common Operations
+```bash
+datahub docker nuke
+```
 
-### Stopping DataHub
+### Upgrade DataHub
 
-To stop DataHub's quickstart, you can issue the following command.
+If you have been testing DataHub locally, a new version of DataHub got released and you want to try the new version then you can just issue the quickstart command again. It will pull down newer images and restart your instance without losing any data.
 
-```
-datahub docker quickstart --stop
+```bash
+datahub docker quickstart
 ```
 
-### Resetting DataHub (a.k.a factory reset)
+### Customize installation
 
-To cleanse DataHub of all of its state (e.g. before ingesting your own), you can use the CLI `nuke` command.
+If you would like to customize the DataHub installation further, please download the [docker-compose.yaml](https://raw.githubusercontent.com/datahub-project/datahub/master/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml) used by the cli tool, modify it as necessary and deploy DataHub by passing the downloaded docker-compose file:
 
-```
-datahub docker nuke
+```bash
+datahub docker quickstart --quickstart-compose-file <path to compose file>
 ```
 
-### Backing up your DataHub Quickstart (experimental)
+### Back up DataHub
 
-The quickstart image is not recommended for use as a production instance. See [Moving to production](#move-to-production) for recommendations on setting up your production cluster. However, in case you want to take a backup of your current quickstart state (e.g. you have a demo to your company coming up and you want to create a copy of the quickstart data so you can restore it at a future date), you can supply the `--backup` flag to quickstart.
+The quickstart image is not recommended for use as a production instance. <br />
+However, in case you want to take a backup of your current quickstart state (e.g. you have a demo to your company coming up and you want to create a copy of the quickstart data so you can restore it at a future date), you can supply the `--backup` flag to quickstart.
 
-```
+<Tabs>
+<TabItem value="backup" label="Back up (default)">
+
+```bash
 datahub docker quickstart --backup
 ```
 
-will take a backup of your MySQL image and write it by default to your `~/.datahub/quickstart/` directory as the file `backup.sql`. You can customize this by passing a `--backup-file` argument.
-e.g.
+This will take a backup of your MySQL image and write it by default to your `~/.datahub/quickstart/` directory as the file `backup.sql`.
 
+</TabItem>
+<TabItem value="backup custom" label="Back up to custom directory">
+
+```bash
+datahub docker quickstart --backup --backup-file <path to backup file>
 ```
-datahub docker quickstart --backup --backup-file /home/my_user/datahub_backups/quickstart_backup_2002_22_01.sql
-```
 
-:::note
+You can customize the backup file path by passing a `--backup-file` argument.
+
+</TabItem>
+</Tabs>
+
+:::caution
 
 Note that the Quickstart backup does not include any timeseries data (dataset statistics, profiles, etc.), so you will lose that information if you delete all your indexes and restore from this backup.
 
 :::
 
-### Restoring your DataHub Quickstart (experimental)
+### Restore DataHub
 
 As you might imagine, these backups are restore-able. The following section describes a few different options you have to restore your backup.
 
-#### Restoring a backup (primary + index) [most common]
+<Tabs>
+<TabItem value="General" label="General Restoring">
 
 To restore a previous backup, run the following command:
 
-```
+```bash
 datahub docker quickstart --restore
 ```
 
@@ -221,38 +220,71 @@ This command will pick up the `backup.sql` file located under `~/.datahub/quicks
 
 To supply a specific backup file, use the `--restore-file` option.
 
-```
+```bash
 datahub docker quickstart --restore --restore-file /home/my_user/datahub_backups/quickstart_backup_2002_22_01.sql
 ```
 
-#### Restoring only the index [to deal with index out of sync / corruption issues]
+</TabItem>
+<TabItem value="Restoring Only Index" label="Restore Only Index">
 
 Another situation that can come up is the index can get corrupt, or be missing some update. In order to re-bootstrap the index from the primary store, you can run this command to sync the index with the primary store.
 
-```
+```bash
 datahub docker quickstart --restore-indices
 ```
 
-#### Restoring a backup (primary but NO index) [rarely used]
+</TabItem>
+
+<TabItem value="Restoring Only Primary" label="Restore Only Primary">
 
 Sometimes, you might want to just restore the state of your primary database (MySQL), but not re-index the data. To do this, you have to explicitly disable the restore-indices capability.
 
-```
+```bash
 datahub docker quickstart --restore --no-restore-indices
 ```
 
-### Upgrading your local DataHub
+</TabItem>
+</Tabs>
 
-If you have been testing DataHub locally, a new version of DataHub got released and you want to try the new version then you can just issue the quickstart command again. It will pull down newer images and restart your instance without losing any data.
+---
 
-```
-datahub docker quickstart
-```
+## Next Steps
 
-### Customization
+- [Quickstart Debugging Guide](./troubleshooting/quickstart.md)
+- [Ingest metadata through the UI](./ui-ingestion.md)
+- [Ingest metadata through the CLI](../metadata-ingestion/README.md)
+- [Add Users to DataHub](authentication/guides/add-users.md)
+- [Configure OIDC Authentication](authentication/guides/sso/configure-oidc-react.md)
+- [Configure JaaS Authentication](authentication/guides/jaas.md)
+- [Configure authentication in DataHub's backend](authentication/introducing-metadata-service-authentication.md#configuring-metadata-service-authentication).
+- [Change the default user datahub in quickstart](authentication/changing-default-credentials.md#quickstart)
 
-If you would like to customize the DataHub installation further, please download the [docker-compose.yaml](https://raw.githubusercontent.com/datahub-project/datahub/master/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml) used by the cli tool, modify it as necessary and deploy DataHub by passing the downloaded docker-compose file:
+### Move To Production
 
-```
-datahub docker quickstart --quickstart-compose-file <path to compose file>
-```
+:::caution
+
+Quickstart is not intended for a production environment. We recommend deploying DataHub to production using Kubernetes.
+We provide helpful [Helm Charts](https://artifacthub.io/packages/helm/datahub/datahub) to help you quickly get up and running.
+Check out [Deploying DataHub to Kubernetes](./deploy/kubernetes.md) for a step-by-step walkthrough.
+
+:::
+
+The `quickstart` method of running DataHub is intended for local development and a quick way to experience the features that DataHub has to offer.
+It is not intended for a production environment. This recommendation is based on the following points.
+
+#### Default Credentials
+
+`quickstart` uses docker-compose configuration which includes default credentials for both DataHub, and it's underlying
+prerequisite data stores, such as MySQL. Additionally, other components are unauthenticated out of the box. This is a
+design choice to make development easier and is not best practice for a production environment.
+
+#### Exposed Ports
+
+DataHub's services, and it's backend data stores use the docker default behavior of binding to all interface addresses.
+This makes it useful for development but is not recommended in a production environment.
+
+#### Performance & Management
+
+`quickstart` is limited by the resources available on a single host, there is no ability to scale horizontally.
+Rollout of new versions often requires downtime and the configuration is largely pre-determined and not easily managed.
+Lastly, by default, `quickstart` follows the most recent builds forcing updates to the latest released and unreleased builds.

From cdb9f5ba620956346479bdbf68920dbdd3f6e0cc Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Sat, 16 Sep 2023 00:25:39 +0530
Subject: [PATCH 016/156] feat(bigquery): add better timers around every API
 call (#8626)

---
 .../ingestion/source/bigquery_v2/bigquery.py  | 241 ++------
 .../source/bigquery_v2/bigquery_audit.py      |  43 --
 .../bigquery_v2/bigquery_audit_log_api.py     | 139 +++++
 .../source/bigquery_v2/bigquery_config.py     |  86 ++-
 .../source/bigquery_v2/bigquery_report.py     |  53 +-
 .../source/bigquery_v2/bigquery_schema.py     | 530 ++++++-----------
 .../ingestion/source/bigquery_v2/common.py    |  34 --
 .../ingestion/source/bigquery_v2/lineage.py   | 545 +++++++++---------
 .../ingestion/source/bigquery_v2/queries.py   | 426 ++++++++++++++
 .../ingestion/source/bigquery_v2/usage.py     | 240 ++------
 .../ingestion/source/redshift/lineage.py      |   4 +-
 .../source/snowflake/snowflake_v2.py          |  21 +-
 .../src/datahub/utilities/perf_timer.py       |  69 ++-
 .../integration/bigquery_v2/test_bigquery.py  |  14 +-
 .../tests/unit/test_bigquery_lineage.py       |  11 +-
 .../tests/unit/test_bigquery_source.py        | 141 +++--
 .../unit/test_bigqueryv2_usage_source.py      |  11 +-
 .../tests/unit/utilities/test_perf_timer.py   |  46 ++
 18 files changed, 1450 insertions(+), 1204 deletions(-)
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py
 create mode 100644 metadata-ingestion/tests/unit/utilities/test_perf_timer.py

diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
index 1107a54a1896b..ae49a4ba17c11 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
@@ -4,7 +4,7 @@
 import re
 import traceback
 from collections import defaultdict
-from datetime import datetime, timedelta, timezone
+from datetime import datetime, timedelta
 from typing import Dict, Iterable, List, Optional, Set, Type, Union, cast
 
 from google.cloud import bigquery
@@ -44,21 +44,17 @@
 from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report
 from datahub.ingestion.source.bigquery_v2.bigquery_schema import (
     BigqueryColumn,
-    BigQueryDataDictionary,
     BigqueryDataset,
     BigqueryProject,
+    BigQuerySchemaApi,
     BigqueryTable,
     BigqueryView,
 )
 from datahub.ingestion.source.bigquery_v2.common import (
     BQ_EXTERNAL_DATASET_URL_TEMPLATE,
     BQ_EXTERNAL_TABLE_URL_TEMPLATE,
-    get_bigquery_client,
-)
-from datahub.ingestion.source.bigquery_v2.lineage import (
-    BigqueryLineageExtractor,
-    make_lineage_edges_from_parsing_result,
 )
+from datahub.ingestion.source.bigquery_v2.lineage import BigqueryLineageExtractor
 from datahub.ingestion.source.bigquery_v2.profiler import BigqueryProfiler
 from datahub.ingestion.source.bigquery_v2.usage import BigQueryUsageExtractor
 from datahub.ingestion.source.common.subtypes import (
@@ -83,7 +79,6 @@
     StatefulIngestionSourceBase,
 )
 from datahub.ingestion.source_report.ingestion_stage import (
-    LINEAGE_EXTRACTION,
     METADATA_EXTRACTION,
     PROFILING,
 )
@@ -94,7 +89,6 @@
 )
 from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
     DatasetProperties,
-    UpstreamLineage,
     ViewProperties,
 )
 from datahub.metadata.com.linkedin.pegasus2avro.schema import (
@@ -113,11 +107,9 @@
 )
 from datahub.metadata.schema_classes import (
     DataPlatformInstanceClass,
-    DatasetLineageTypeClass,
     GlobalTagsClass,
     TagAssociationClass,
 )
-from datahub.specific.dataset import DatasetPatchBuilder
 from datahub.utilities.file_backed_collections import FileBackedDict
 from datahub.utilities.hive_schema_to_avro import (
     HiveColumnToAvroConverter,
@@ -126,7 +118,7 @@
 from datahub.utilities.mapping import Constants
 from datahub.utilities.perf_timer import PerfTimer
 from datahub.utilities.registries.domain_registry import DomainRegistry
-from datahub.utilities.sqlglot_lineage import SchemaResolver, sqlglot_lineage
+from datahub.utilities.sqlglot_lineage import SchemaResolver
 
 logger: logging.Logger = logging.getLogger(__name__)
 
@@ -228,11 +220,15 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config):
 
         set_dataset_urn_to_lower(self.config.convert_urns_to_lowercase)
 
-        self.redundant_lineage_run_skip_handler: Optional[
+        self.bigquery_data_dictionary = BigQuerySchemaApi(
+            self.report.schema_api_perf, self.config.get_bigquery_client()
+        )
+
+        redundant_lineage_run_skip_handler: Optional[
             RedundantLineageRunSkipHandler
         ] = None
         if self.config.enable_stateful_lineage_ingestion:
-            self.redundant_lineage_run_skip_handler = RedundantLineageRunSkipHandler(
+            redundant_lineage_run_skip_handler = RedundantLineageRunSkipHandler(
                 source=self,
                 config=self.config,
                 pipeline_name=self.ctx.pipeline_name,
@@ -241,7 +237,10 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config):
 
         # For database, schema, tables, views, etc
         self.lineage_extractor = BigqueryLineageExtractor(
-            config, self.report, self.redundant_lineage_run_skip_handler
+            config,
+            self.report,
+            dataset_urn_builder=self.gen_dataset_urn_from_ref,
+            redundant_run_skip_handler=redundant_lineage_run_skip_handler,
         )
 
         redundant_usage_run_skip_handler: Optional[RedundantUsageRunSkipHandler] = None
@@ -289,6 +288,7 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config):
         self.sql_parser_schema_resolver = SchemaResolver(
             platform=self.platform, env=self.config.env
         )
+
         self.add_config_to_report()
         atexit.register(cleanup, config)
 
@@ -314,18 +314,20 @@ def metadata_read_capability_test(
         for project_id in project_ids:
             try:
                 logger.info((f"Metadata read capability test for project {project_id}"))
-                client: bigquery.Client = get_bigquery_client(config)
+                client: bigquery.Client = config.get_bigquery_client()
                 assert client
-                result = BigQueryDataDictionary.get_datasets_for_project_id(
-                    client, project_id, 10
+                bigquery_data_dictionary = BigQuerySchemaApi(
+                    BigQueryV2Report().schema_api_perf, client
+                )
+                result = bigquery_data_dictionary.get_datasets_for_project_id(
+                    project_id, 10
                 )
                 if len(result) == 0:
                     return CapabilityReport(
                         capable=False,
                         failure_reason=f"Dataset query returned empty dataset. It is either empty or no dataset in project {project_id}",
                     )
-                tables = BigQueryDataDictionary.get_tables_for_dataset(
-                    conn=client,
+                tables = bigquery_data_dictionary.get_tables_for_dataset(
                     project_id=project_id,
                     dataset_name=result[0].name,
                     tables={},
@@ -351,7 +353,9 @@ def lineage_capability_test(
         project_ids: List[str],
         report: BigQueryV2Report,
     ) -> CapabilityReport:
-        lineage_extractor = BigqueryLineageExtractor(connection_conf, report)
+        lineage_extractor = BigqueryLineageExtractor(
+            connection_conf, report, lambda ref: ""
+        )
         for project_id in project_ids:
             try:
                 logger.info(f"Lineage capability test for project {project_id}")
@@ -397,7 +401,7 @@ def test_connection(config_dict: dict) -> TestConnectionReport:
 
         try:
             connection_conf = BigQueryV2Config.parse_obj_allow_extras(config_dict)
-            client: bigquery.Client = get_bigquery_client(connection_conf)
+            client: bigquery.Client = connection_conf.get_bigquery_client()
             assert client
 
             test_report.basic_connectivity = BigqueryV2Source.connectivity_test(client)
@@ -519,54 +523,30 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
         ]
 
     def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
-        conn: bigquery.Client = get_bigquery_client(self.config)
-
-        projects = self._get_projects(conn)
+        projects = self._get_projects()
         if not projects:
             return
 
         for project_id in projects:
             self.report.set_ingestion_stage(project_id.id, METADATA_EXTRACTION)
             logger.info(f"Processing project: {project_id.id}")
-            yield from self._process_project(conn, project_id)
+            yield from self._process_project(project_id)
 
         if self.config.include_usage_statistics:
             yield from self.usage_extractor.get_usage_workunits(
                 [p.id for p in projects], self.table_refs
             )
 
-        if self._should_ingest_lineage():
-            for project in projects:
-                self.report.set_ingestion_stage(project.id, LINEAGE_EXTRACTION)
-                yield from self.generate_lineage(project.id)
-
-            if self.redundant_lineage_run_skip_handler:
-                # Update the checkpoint state for this run.
-                self.redundant_lineage_run_skip_handler.update_state(
-                    self.config.start_time, self.config.end_time
-                )
-
-    def _should_ingest_lineage(self) -> bool:
-        if not self.config.include_table_lineage:
-            return False
-
-        if (
-            self.redundant_lineage_run_skip_handler
-            and self.redundant_lineage_run_skip_handler.should_skip_this_run(
-                cur_start_time=self.config.start_time,
-                cur_end_time=self.config.end_time,
+        if self.config.include_table_lineage:
+            yield from self.lineage_extractor.get_lineage_workunits(
+                [p.id for p in projects],
+                self.sql_parser_schema_resolver,
+                self.view_refs_by_project,
+                self.view_definitions,
+                self.table_refs,
             )
-        ):
-            # Skip this run
-            self.report.report_warning(
-                "lineage-extraction",
-                "Skip this run as there was already a run for current ingestion window.",
-            )
-            return False
-
-        return True
 
-    def _get_projects(self, conn: bigquery.Client) -> List[BigqueryProject]:
+    def _get_projects(self) -> List[BigqueryProject]:
         logger.info("Getting projects")
         if self.config.project_ids or self.config.project_id:
             project_ids = self.config.project_ids or [self.config.project_id]  # type: ignore
@@ -575,15 +555,10 @@ def _get_projects(self, conn: bigquery.Client) -> List[BigqueryProject]:
                 for project_id in project_ids
             ]
         else:
-            return list(self._get_project_list(conn))
-
-    def _get_project_list(self, conn: bigquery.Client) -> Iterable[BigqueryProject]:
-        try:
-            projects = BigQueryDataDictionary.get_projects(conn)
-        except Exception as e:
-            logger.error(f"Error getting projects. {e}", exc_info=True)
-            projects = []
+            return list(self._query_project_list())
 
+    def _query_project_list(self) -> Iterable[BigqueryProject]:
+        projects = self.bigquery_data_dictionary.get_projects()
         if not projects:  # Report failure on exception and if empty list is returned
             self.report.report_failure(
                 "metadata-extraction",
@@ -600,7 +575,7 @@ def _get_project_list(self, conn: bigquery.Client) -> Iterable[BigqueryProject]:
                 self.report.report_dropped(project.id)
 
     def _process_project(
-        self, conn: bigquery.Client, bigquery_project: BigqueryProject
+        self, bigquery_project: BigqueryProject
     ) -> Iterable[MetadataWorkUnit]:
         db_tables: Dict[str, List[BigqueryTable]] = {}
         db_views: Dict[str, List[BigqueryView]] = {}
@@ -611,7 +586,7 @@ def _process_project(
 
         try:
             bigquery_project.datasets = (
-                BigQueryDataDictionary.get_datasets_for_project_id(conn, project_id)
+                self.bigquery_data_dictionary.get_datasets_for_project_id(project_id)
             )
         except Exception as e:
             error_message = f"Unable to get datasets for project {project_id}, skipping. The error was: {e}"
@@ -645,7 +620,7 @@ def _process_project(
             try:
                 # db_tables and db_views are populated in the this method
                 yield from self._process_schema(
-                    conn, project_id, bigquery_dataset, db_tables, db_views
+                    project_id, bigquery_dataset, db_tables, db_views
                 )
 
             except Exception as e:
@@ -670,73 +645,8 @@ def _process_project(
                 tables=db_tables,
             )
 
-    def generate_lineage(self, project_id: str) -> Iterable[MetadataWorkUnit]:
-        logger.info(f"Generate lineage for {project_id}")
-        lineage = self.lineage_extractor.calculate_lineage_for_project(
-            project_id,
-            sql_parser_schema_resolver=self.sql_parser_schema_resolver,
-        )
-
-        if self.config.lineage_parse_view_ddl:
-            for view in self.view_refs_by_project[project_id]:
-                view_definition = self.view_definitions[view]
-                raw_view_lineage = sqlglot_lineage(
-                    view_definition,
-                    schema_resolver=self.sql_parser_schema_resolver,
-                    default_db=project_id,
-                )
-                if raw_view_lineage.debug_info.table_error:
-                    logger.debug(
-                        f"Failed to parse lineage for view {view}: {raw_view_lineage.debug_info.table_error}"
-                    )
-                    self.report.num_view_definitions_failed_parsing += 1
-                    self.report.view_definitions_parsing_failures.append(
-                        f"Table-level sql parsing error for view {view}: {raw_view_lineage.debug_info.table_error}"
-                    )
-                    continue
-                elif raw_view_lineage.debug_info.column_error:
-                    self.report.num_view_definitions_failed_column_parsing += 1
-                    self.report.view_definitions_parsing_failures.append(
-                        f"Column-level sql parsing error for view {view}: {raw_view_lineage.debug_info.column_error}"
-                    )
-                else:
-                    self.report.num_view_definitions_parsed += 1
-
-                # For views, we override the upstreams obtained by parsing audit logs
-                # as they may contain indirectly referenced tables.
-                ts = datetime.now(timezone.utc)
-                lineage[view] = set(
-                    make_lineage_edges_from_parsing_result(
-                        raw_view_lineage,
-                        audit_stamp=ts,
-                        lineage_type=DatasetLineageTypeClass.VIEW,
-                    )
-                )
-
-        for lineage_key in lineage.keys():
-            if lineage_key not in self.table_refs:
-                continue
-
-            table_ref = BigQueryTableRef.from_string_name(lineage_key)
-            dataset_urn = self.gen_dataset_urn(
-                project_id=table_ref.table_identifier.project_id,
-                dataset_name=table_ref.table_identifier.dataset,
-                table=table_ref.table_identifier.get_table_display_name(),
-            )
-
-            lineage_info = self.lineage_extractor.get_lineage_for_table(
-                bq_table=table_ref,
-                bq_table_urn=dataset_urn,
-                platform=self.platform,
-                lineage_metadata=lineage,
-            )
-
-            if lineage_info:
-                yield from self.gen_lineage(dataset_urn, lineage_info)
-
     def _process_schema(
         self,
-        conn: bigquery.Client,
         project_id: str,
         bigquery_dataset: BigqueryDataset,
         db_tables: Dict[str, List[BigqueryTable]],
@@ -750,8 +660,7 @@ def _process_schema(
 
         columns = None
         if self.config.include_tables or self.config.include_views:
-            columns = BigQueryDataDictionary.get_columns_for_dataset(
-                conn,
+            columns = self.bigquery_data_dictionary.get_columns_for_dataset(
                 project_id=project_id,
                 dataset_name=dataset_name,
                 column_limit=self.config.column_limit,
@@ -760,7 +669,7 @@ def _process_schema(
 
         if self.config.include_tables:
             db_tables[dataset_name] = list(
-                self.get_tables_for_dataset(conn, project_id, dataset_name)
+                self.get_tables_for_dataset(project_id, dataset_name)
             )
 
             for table in db_tables[dataset_name]:
@@ -773,7 +682,9 @@ def _process_schema(
                 )
         elif self.config.include_table_lineage or self.config.include_usage_statistics:
             # Need table_refs to calculate lineage and usage
-            for table_item in conn.list_tables(f"{project_id}.{dataset_name}"):
+            for table_item in self.bigquery_data_dictionary.list_tables(
+                dataset_name, project_id
+            ):
                 identifier = BigqueryTableIdentifier(
                     project_id=project_id,
                     dataset=dataset_name,
@@ -793,8 +704,8 @@ def _process_schema(
 
         if self.config.include_views:
             db_views[dataset_name] = list(
-                BigQueryDataDictionary.get_views_for_dataset(
-                    conn, project_id, dataset_name, self.config.is_profiling_enabled()
+                self.bigquery_data_dictionary.get_views_for_dataset(
+                    project_id, dataset_name, self.config.is_profiling_enabled()
                 )
             )
 
@@ -1065,39 +976,6 @@ def gen_dataset_workunits(
                 domain_config=self.config.domain,
             )
 
-    def gen_lineage(
-        self,
-        dataset_urn: str,
-        upstream_lineage: Optional[UpstreamLineage] = None,
-    ) -> Iterable[MetadataWorkUnit]:
-        if upstream_lineage is None:
-            return
-
-        if upstream_lineage is not None:
-            if self.config.incremental_lineage:
-                patch_builder: DatasetPatchBuilder = DatasetPatchBuilder(
-                    urn=dataset_urn
-                )
-                for upstream in upstream_lineage.upstreams:
-                    patch_builder.add_upstream_lineage(upstream)
-
-                yield from [
-                    MetadataWorkUnit(
-                        id=f"upstreamLineage-for-{dataset_urn}",
-                        mcp_raw=mcp,
-                    )
-                    for mcp in patch_builder.build()
-                ]
-            else:
-                if not self.config.extract_column_lineage:
-                    upstream_lineage.fineGrainedLineages = None
-
-                yield from [
-                    MetadataChangeProposalWrapper(
-                        entityUrn=dataset_urn, aspect=upstream_lineage
-                    ).as_workunit()
-                ]
-
     def gen_tags_aspect_workunit(
         self, dataset_urn: str, tags_to_add: List[str]
     ) -> MetadataWorkUnit:
@@ -1212,7 +1090,6 @@ def get_report(self) -> BigQueryV2Report:
 
     def get_tables_for_dataset(
         self,
-        conn: bigquery.Client,
         project_id: str,
         dataset_name: str,
     ) -> Iterable[BigqueryTable]:
@@ -1231,14 +1108,15 @@ def get_tables_for_dataset(
 
             # We get the list of tables in the dataset to get core table properties and to be able to process the tables in batches
             # We collect only the latest shards from sharded tables (tables with _YYYYMMDD suffix) and ignore temporary tables
-            table_items = self.get_core_table_details(conn, dataset_name, project_id)
+            table_items = self.get_core_table_details(
+                dataset_name, project_id, self.config.temp_table_dataset_prefix
+            )
 
             items_to_get: Dict[str, TableListItem] = {}
             for table_item in table_items.keys():
                 items_to_get[table_item] = table_items[table_item]
                 if len(items_to_get) % max_batch_size == 0:
-                    yield from BigQueryDataDictionary.get_tables_for_dataset(
-                        conn,
+                    yield from self.bigquery_data_dictionary.get_tables_for_dataset(
                         project_id,
                         dataset_name,
                         items_to_get,
@@ -1247,8 +1125,7 @@ def get_tables_for_dataset(
                     items_to_get.clear()
 
             if items_to_get:
-                yield from BigQueryDataDictionary.get_tables_for_dataset(
-                    conn,
+                yield from self.bigquery_data_dictionary.get_tables_for_dataset(
                     project_id,
                     dataset_name,
                     items_to_get,
@@ -1260,13 +1137,15 @@ def get_tables_for_dataset(
         )
 
     def get_core_table_details(
-        self, conn: bigquery.Client, dataset_name: str, project_id: str
+        self, dataset_name: str, project_id: str, temp_table_dataset_prefix: str
     ) -> Dict[str, TableListItem]:
         table_items: Dict[str, TableListItem] = {}
         # Dict to store sharded table and the last seen max shard id
         sharded_tables: Dict[str, TableListItem] = {}
 
-        for table in conn.list_tables(f"{project_id}.{dataset_name}"):
+        for table in self.bigquery_data_dictionary.list_tables(
+            dataset_name, project_id
+        ):
             table_identifier = BigqueryTableIdentifier(
                 project_id=project_id,
                 dataset=dataset_name,
@@ -1303,9 +1182,7 @@ def get_core_table_details(
                 if stored_shard < shard:
                     sharded_tables[table_name] = table
                 continue
-            elif str(table_identifier).startswith(
-                self.config.temp_table_dataset_prefix
-            ):
+            elif str(table_identifier).startswith(temp_table_dataset_prefix):
                 logger.debug(f"Dropping temporary table {table_identifier.table}")
                 self.report.report_dropped(table_identifier.raw_table_name())
                 continue
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py
index 0f9b37c93feaa..b0ac77201b415 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py
@@ -13,48 +13,6 @@
     get_first_missing_key_any,
 )
 
-BQ_FILTER_RULE_TEMPLATE = "BQ_FILTER_RULE_TEMPLATE"
-
-BQ_AUDIT_V2 = {
-    BQ_FILTER_RULE_TEMPLATE: """
-resource.type=("bigquery_project" OR "bigquery_dataset")
-AND
-timestamp >= "{start_time}"
-AND
-timestamp < "{end_time}"
-AND protoPayload.serviceName="bigquery.googleapis.com"
-AND
-(
-    (
-        protoPayload.methodName=
-            (
-                "google.cloud.bigquery.v2.JobService.Query"
-                OR
-                "google.cloud.bigquery.v2.JobService.InsertJob"
-            )
-        AND protoPayload.metadata.jobChange.job.jobStatus.jobState="DONE"
-        AND NOT protoPayload.metadata.jobChange.job.jobStatus.errorResult:*
-        AND protoPayload.metadata.jobChange.job.jobConfig.queryConfig:*
-        AND
-        (
-            (
-                protoPayload.metadata.jobChange.job.jobStats.queryStats.referencedTables:*
-                AND NOT protoPayload.metadata.jobChange.job.jobStats.queryStats.referencedTables =~ "projects/.*/datasets/.*/tables/__TABLES__|__TABLES_SUMMARY__|INFORMATION_SCHEMA.*"
-            )
-            OR
-            (
-                protoPayload.metadata.jobChange.job.jobConfig.queryConfig.destinationTable:*
-            )
-        )
-    )
-    OR
-    protoPayload.metadata.tableDataRead.reason = "JOB"
-)
-""".strip(
-        "\t \n"
-    ),
-}
-
 AuditLogEntry = Any
 
 # BigQueryAuditMetadata is the v2 format in which audit logs are exported to BigQuery
@@ -606,7 +564,6 @@ def from_query_event(
         query_event: QueryEvent,
         debug_include_full_payloads: bool = False,
     ) -> "ReadEvent":
-
         readEvent = ReadEvent(
             actor_email=query_event.actor_email,
             timestamp=query_event.timestamp,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py
new file mode 100644
index 0000000000000..03b12c61ee5c6
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py
@@ -0,0 +1,139 @@
+import logging
+from datetime import datetime
+from typing import Callable, Iterable, List, Optional
+
+from google.cloud import bigquery
+from google.cloud.logging_v2.client import Client as GCPLoggingClient
+from ratelimiter import RateLimiter
+
+from datahub.ingestion.source.bigquery_v2.bigquery_audit import (
+    AuditLogEntry,
+    BigQueryAuditMetadata,
+)
+from datahub.ingestion.source.bigquery_v2.bigquery_report import (
+    BigQueryAuditLogApiPerfReport,
+)
+from datahub.ingestion.source.bigquery_v2.common import (
+    BQ_DATE_SHARD_FORMAT,
+    BQ_DATETIME_FORMAT,
+)
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+
+# Api interfaces are separated based on functionality they provide
+# rather than the underlying bigquery client that is used to
+# provide the functionality.
+class BigQueryAuditLogApi:
+    def __init__(
+        self,
+        report: BigQueryAuditLogApiPerfReport,
+        rate_limit: bool,
+        requests_per_min: int,
+    ) -> None:
+        self.report = report
+        self.rate_limit = rate_limit
+        self.requests_per_min = requests_per_min
+
+    def get_exported_bigquery_audit_metadata(
+        self,
+        bigquery_client: bigquery.Client,
+        bigquery_audit_metadata_query_template: Callable[
+            [
+                str,  # dataset: str
+                bool,  # use_date_sharded_tables: bool
+                Optional[int],  # limit: Optional[int] = None
+            ],
+            str,
+        ],
+        bigquery_audit_metadata_datasets: Optional[List[str]],
+        use_date_sharded_audit_log_tables: bool,
+        start_time: datetime,
+        end_time: datetime,
+        limit: Optional[int] = None,
+    ) -> Iterable[BigQueryAuditMetadata]:
+        if bigquery_audit_metadata_datasets is None:
+            return
+
+        audit_start_time = start_time.strftime(BQ_DATETIME_FORMAT)
+        audit_start_date = start_time.strftime(BQ_DATE_SHARD_FORMAT)
+
+        audit_end_time = end_time.strftime(BQ_DATETIME_FORMAT)
+        audit_end_date = end_time.strftime(BQ_DATE_SHARD_FORMAT)
+
+        rate_limiter: Optional[RateLimiter] = None
+        if self.rate_limit:
+            rate_limiter = RateLimiter(max_calls=self.requests_per_min, period=60)
+
+        with self.report.get_exported_log_entries as current_timer:
+            for dataset in bigquery_audit_metadata_datasets:
+                logger.info(
+                    f"Start loading log entries from BigQueryAuditMetadata in {dataset}"
+                )
+
+                query = bigquery_audit_metadata_query_template(
+                    dataset,
+                    use_date_sharded_audit_log_tables,
+                    limit,
+                ).format(
+                    start_time=audit_start_time,
+                    end_time=audit_end_time,
+                    start_date=audit_start_date,
+                    end_date=audit_end_date,
+                )
+
+                query_job = bigquery_client.query(query)
+                logger.info(
+                    f"Finished loading log entries from BigQueryAuditMetadata in {dataset}"
+                )
+
+                for entry in query_job:
+                    with current_timer.pause():
+                        if rate_limiter:
+                            with rate_limiter:
+                                yield entry
+                        else:
+                            yield entry
+
+    def get_bigquery_log_entries_via_gcp_logging(
+        self,
+        client: GCPLoggingClient,
+        filter: str,
+        log_page_size: int,
+        limit: Optional[int] = None,
+    ) -> Iterable[AuditLogEntry]:
+        logger.debug(filter)
+
+        list_entries: Iterable[AuditLogEntry]
+        rate_limiter: Optional[RateLimiter] = None
+        if self.rate_limit:
+            # client.list_entries is a generator, does api calls to GCP Logging when it runs out of entries and needs to fetch more from GCP Logging
+            # to properly ratelimit we multiply the page size by the number of requests per minute
+            rate_limiter = RateLimiter(
+                max_calls=self.requests_per_min * log_page_size,
+                period=60,
+            )
+
+        with self.report.list_log_entries as current_timer:
+            list_entries = client.list_entries(
+                filter_=filter,
+                page_size=log_page_size,
+                max_results=limit,
+            )
+
+            for i, entry in enumerate(list_entries):
+                if i % 1000 == 0:
+                    logger.info(
+                        f"Loaded {i} log entries from GCP Log for {client.project}"
+                    )
+
+                with current_timer.pause():
+                    if rate_limiter:
+                        with rate_limiter:
+                            yield entry
+                    else:
+                        yield entry
+
+            logger.info(
+                f"Finished loading log entries from GCP Log for {client.project}"
+            )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
index 0f2082c5e53bf..3b06a4699c566 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
@@ -4,9 +4,11 @@
 from typing import Any, Dict, List, Optional
 
 import pydantic
-from pydantic import Field, PositiveInt, PrivateAttr, root_validator
+from google.cloud import bigquery
+from google.cloud.logging_v2.client import Client as GCPLoggingClient
+from pydantic import Field, PositiveInt, PrivateAttr, root_validator, validator
 
-from datahub.configuration.common import AllowDenyPattern
+from datahub.configuration.common import AllowDenyPattern, ConfigModel
 from datahub.configuration.validate_field_removal import pydantic_removed_field
 from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
 from datahub.ingestion.source.state.stateful_ingestion_base import (
@@ -35,7 +37,52 @@ class BigQueryUsageConfig(BaseUsageConfig):
     )
 
 
+class BigQueryConnectionConfig(ConfigModel):
+    credential: Optional[BigQueryCredential] = Field(
+        default=None, description="BigQuery credential informations"
+    )
+
+    _credentials_path: Optional[str] = PrivateAttr(None)
+
+    extra_client_options: Dict[str, Any] = Field(
+        default={},
+        description="Additional options to pass to google.cloud.logging_v2.client.Client.",
+    )
+
+    project_on_behalf: Optional[str] = Field(
+        default=None,
+        description="[Advanced] The BigQuery project in which queries are executed. Will be passed when creating a job. If not passed, falls back to the project associated with the service account.",
+    )
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if self.credential:
+            self._credentials_path = self.credential.create_credential_temp_file()
+            logger.debug(
+                f"Creating temporary credential file at {self._credentials_path}"
+            )
+            os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self._credentials_path
+
+    def get_bigquery_client(config) -> bigquery.Client:
+        client_options = config.extra_client_options
+        return bigquery.Client(config.project_on_behalf, **client_options)
+
+    def make_gcp_logging_client(
+        self, project_id: Optional[str] = None
+    ) -> GCPLoggingClient:
+        # See https://github.com/googleapis/google-cloud-python/issues/2674 for
+        # why we disable gRPC here.
+        client_options = self.extra_client_options.copy()
+        client_options["_use_grpc"] = False
+        if project_id is not None:
+            return GCPLoggingClient(**client_options, project=project_id)
+        else:
+            return GCPLoggingClient(**client_options)
+
+
 class BigQueryV2Config(
+    BigQueryConnectionConfig,
     BigQueryBaseConfig,
     SQLCommonConfig,
     StatefulUsageConfigMixin,
@@ -122,11 +169,6 @@ class BigQueryV2Config(
         ),
     )
 
-    project_on_behalf: Optional[str] = Field(
-        default=None,
-        description="[Advanced] The BigQuery project in which queries are executed. Will be passed when creating a job. If not passed, falls back to the project associated with the service account.",
-    )
-
     storage_project_id: None = Field(default=None, hidden_from_docs=True)
 
     lineage_use_sql_parser: bool = Field(
@@ -180,14 +222,8 @@ def validate_column_lineage(cls, v: bool, values: Dict[str, Any]) -> bool:
         default=1000,
         description="The number of log item will be queried per page for lineage collection",
     )
-    credential: Optional[BigQueryCredential] = Field(
-        description="BigQuery credential informations"
-    )
+
     # extra_client_options, include_table_lineage and max_query_duration are relevant only when computing the lineage.
-    extra_client_options: Dict[str, Any] = Field(
-        default={},
-        description="Additional options to pass to google.cloud.logging_v2.client.Client.",
-    )
     include_table_lineage: Optional[bool] = Field(
         default=True,
         description="Option to enable/disable lineage generation. Is enabled by default.",
@@ -209,7 +245,6 @@ def validate_column_lineage(cls, v: bool, values: Dict[str, Any]) -> bool:
         default=False,
         description="Whether to read date sharded tables or time partitioned tables when extracting usage from exported audit logs.",
     )
-    _credentials_path: Optional[str] = PrivateAttr(None)
 
     _cache_path: Optional[str] = PrivateAttr(None)
 
@@ -230,16 +265,6 @@ def validate_column_lineage(cls, v: bool, values: Dict[str, Any]) -> bool:
         description="Maximum number of entries for the in-memory caches of FileBacked data structures.",
     )
 
-    def __init__(self, **data: Any):
-        super().__init__(**data)
-
-        if self.credential:
-            self._credentials_path = self.credential.create_credential_temp_file()
-            logger.debug(
-                f"Creating temporary credential file at {self._credentials_path}"
-            )
-            os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self._credentials_path
-
     @root_validator(pre=False)
     def profile_default_settings(cls, values: Dict) -> Dict:
         # Extra default SQLAlchemy option for better connection pooling and threading.
@@ -248,6 +273,17 @@ def profile_default_settings(cls, values: Dict) -> Dict:
 
         return values
 
+    @validator("bigquery_audit_metadata_datasets")
+    def validate_bigquery_audit_metadata_datasets(
+        cls, v: Optional[List[str]], values: Dict
+    ) -> Optional[List[str]]:
+        if values.get("use_exported_bigquery_audit_metadata"):
+            assert (
+                v and len(v) > 0
+            ), "`bigquery_audit_metadata_datasets` should be set if using `use_exported_bigquery_audit_metadata: True`."
+
+        return v
+
     @root_validator(pre=False)
     def backward_compatibility_configs_set(cls, values: Dict) -> Dict:
         project_id = values.get("project_id")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py
index b2251fbb8ab1f..2d6882caa38ef 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py
@@ -1,5 +1,4 @@
 import collections
-import dataclasses
 import logging
 from dataclasses import dataclass, field
 from datetime import datetime
@@ -11,11 +10,26 @@
 from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
 from datahub.ingestion.source_report.time_window import BaseTimeWindowReport
 from datahub.utilities.lossy_collections import LossyDict, LossyList
+from datahub.utilities.perf_timer import PerfTimer
 from datahub.utilities.stats_collections import TopKDict, int_top_k_dict
 
 logger: logging.Logger = logging.getLogger(__name__)
 
 
+class BigQuerySchemaApiPerfReport:
+    list_projects = PerfTimer()
+    list_datasets = PerfTimer()
+    get_columns_for_dataset = PerfTimer()
+    get_tables_for_dataset = PerfTimer()
+    list_tables = PerfTimer()
+    get_views_for_dataset = PerfTimer()
+
+
+class BigQueryAuditLogApiPerfReport:
+    get_exported_log_entries = PerfTimer()
+    list_log_entries = PerfTimer()
+
+
 @dataclass
 class BigQueryV2Report(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowReport):
     num_total_lineage_entries: TopKDict[str, int] = field(default_factory=TopKDict)
@@ -31,8 +45,12 @@ class BigQueryV2Report(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowR
     num_skipped_lineage_entries_other: TopKDict[str, int] = field(
         default_factory=int_top_k_dict
     )
-    num_total_log_entries: TopKDict[str, int] = field(default_factory=int_top_k_dict)
-    num_parsed_log_entries: TopKDict[str, int] = field(default_factory=int_top_k_dict)
+    num_lineage_total_log_entries: TopKDict[str, int] = field(
+        default_factory=int_top_k_dict
+    )
+    num_lineage_parsed_log_entries: TopKDict[str, int] = field(
+        default_factory=int_top_k_dict
+    )
     num_lineage_log_parse_failures: TopKDict[str, int] = field(
         default_factory=int_top_k_dict
     )
@@ -42,7 +60,14 @@ class BigQueryV2Report(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowR
     lineage_mem_size: Dict[str, str] = field(default_factory=TopKDict)
     lineage_extraction_sec: Dict[str, float] = field(default_factory=TopKDict)
     usage_extraction_sec: Dict[str, float] = field(default_factory=TopKDict)
+    num_usage_total_log_entries: TopKDict[str, int] = field(
+        default_factory=int_top_k_dict
+    )
+    num_usage_parsed_log_entries: TopKDict[str, int] = field(
+        default_factory=int_top_k_dict
+    )
     usage_error_count: Dict[str, int] = field(default_factory=int_top_k_dict)
+
     num_usage_resources_dropped: int = 0
     num_usage_operations_dropped: int = 0
     operation_dropped: LossyList[str] = field(default_factory=LossyList)
@@ -53,10 +78,10 @@ class BigQueryV2Report(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowR
     use_date_sharded_audit_log_tables: Optional[bool] = None
     log_page_size: Optional[pydantic.PositiveInt] = None
     use_exported_bigquery_audit_metadata: Optional[bool] = None
-    log_entry_start_time: Optional[str] = None
-    log_entry_end_time: Optional[str] = None
-    audit_start_time: Optional[str] = None
-    audit_end_time: Optional[str] = None
+    log_entry_start_time: Optional[datetime] = None
+    log_entry_end_time: Optional[datetime] = None
+    audit_start_time: Optional[datetime] = None
+    audit_end_time: Optional[datetime] = None
     upstream_lineage: LossyDict = field(default_factory=LossyDict)
     partition_info: Dict[str, str] = field(default_factory=TopKDict)
     profile_table_selection_criteria: Dict[str, str] = field(default_factory=TopKDict)
@@ -89,13 +114,17 @@ class BigQueryV2Report(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowR
     num_view_definitions_failed_column_parsing: int = 0
     view_definitions_parsing_failures: LossyList[str] = field(default_factory=LossyList)
 
-    read_reasons_stat: Counter[str] = dataclasses.field(
-        default_factory=collections.Counter
+    read_reasons_stat: Counter[str] = field(default_factory=collections.Counter)
+    operation_types_stat: Counter[str] = field(default_factory=collections.Counter)
+
+    usage_state_size: Optional[str] = None
+
+    schema_api_perf: BigQuerySchemaApiPerfReport = field(
+        default_factory=BigQuerySchemaApiPerfReport
     )
-    operation_types_stat: Counter[str] = dataclasses.field(
-        default_factory=collections.Counter
+    audit_log_api_perf: BigQueryAuditLogApiPerfReport = field(
+        default_factory=BigQueryAuditLogApiPerfReport
     )
-    usage_state_size: Optional[str] = None
 
     lineage_start_time: Optional[datetime] = None
     lineage_end_time: Optional[datetime] = None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py
index 47a04c545231b..7edc8656360bb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py
@@ -13,22 +13,19 @@
 )
 
 from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier
-from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report
+from datahub.ingestion.source.bigquery_v2.bigquery_report import (
+    BigQuerySchemaApiPerfReport,
+    BigQueryV2Report,
+)
+from datahub.ingestion.source.bigquery_v2.queries import (
+    BigqueryQuery,
+    BigqueryTableType,
+)
 from datahub.ingestion.source.sql.sql_generic import BaseColumn, BaseTable, BaseView
 
 logger: logging.Logger = logging.getLogger(__name__)
 
 
-class BigqueryTableType:
-    # See https://cloud.google.com/bigquery/docs/information-schema-tables#schema
-    BASE_TABLE = "BASE TABLE"
-    EXTERNAL = "EXTERNAL"
-    VIEW = "VIEW"
-    MATERIALIZED_VIEW = "MATERIALIZED VIEW"
-    CLONE = "CLONE"
-    SNAPSHOT = "SNAPSHOT"
-
-
 @dataclass
 class BigqueryColumn(BaseColumn):
     field_path: str
@@ -129,253 +126,43 @@ class BigqueryProject:
     datasets: List[BigqueryDataset] = field(default_factory=list)
 
 
-class BigqueryQuery:
-    show_datasets: str = (
-        "select schema_name from `{project_id}`.INFORMATION_SCHEMA.SCHEMATA"
-    )
-
-    datasets_for_project_id: str = """
-select
-  s.CATALOG_NAME as catalog_name,
-  s.schema_name as table_schema,
-  s.location as location,
-  s.CREATION_TIME as created,
-  s.LAST_MODIFIED_TIME as last_altered,
-  o.OPTION_VALUE as comment
-from
-  `{project_id}`.INFORMATION_SCHEMA.SCHEMATA as s
-  left join `{project_id}`.INFORMATION_SCHEMA.SCHEMATA_OPTIONS as o on o.schema_name = s.schema_name
-  and o.option_name = "description"
-order by
-  s.schema_name
-"""
-
-    # https://cloud.google.com/bigquery/docs/information-schema-table-storage?hl=en
-    # Note for max_partition_id -
-    # should we instead pick the partition with latest LAST_MODIFIED_TIME ?
-    # for range partitioning max may not be latest partition
-    tables_for_dataset = f"""
-SELECT
-  t.table_catalog as table_catalog,
-  t.table_schema as table_schema,
-  t.table_name as table_name,
-  t.table_type as table_type,
-  t.creation_time as created,
-  ts.last_modified_time as last_altered,
-  tos.OPTION_VALUE as comment,
-  is_insertable_into,
-  ddl,
-  row_count,
-  size_bytes as bytes,
-  num_partitions,
-  max_partition_id,
-  active_billable_bytes,
-  long_term_billable_bytes,
-  REGEXP_EXTRACT(t.table_name, r".*_(\\d+)$") as table_suffix,
-  REGEXP_REPLACE(t.table_name, r"_(\\d+)$", "") as table_base
-
-FROM
-  `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t
-  join `{{project_id}}`.`{{dataset_name}}`.__TABLES__ as ts on ts.table_id = t.TABLE_NAME
-  left join `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLE_OPTIONS as tos on t.table_schema = tos.table_schema
-  and t.TABLE_NAME = tos.TABLE_NAME
-  and tos.OPTION_NAME = "description"
-  left join (
-    select
-        table_name,
-        sum(case when partition_id not in ('__NULL__', '__UNPARTITIONED__', '__STREAMING_UNPARTITIONED__') then 1 else 0 END) as num_partitions,
-        max(case when partition_id not in ('__NULL__', '__UNPARTITIONED__', '__STREAMING_UNPARTITIONED__') then partition_id else NULL END) as max_partition_id,
-        sum(total_rows) as total_rows,
-        sum(case when storage_tier = 'LONG_TERM' then total_billable_bytes else 0 end) as long_term_billable_bytes,
-        sum(case when storage_tier = 'ACTIVE' then total_billable_bytes else 0 end) as active_billable_bytes,
-    from
-        `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.PARTITIONS
-    group by
-        table_name) as p on
-    t.table_name = p.table_name
-WHERE
-  table_type in ('{BigqueryTableType.BASE_TABLE}', '{BigqueryTableType.EXTERNAL}')
-{{table_filter}}
-order by
-  table_schema ASC,
-  table_base ASC,
-  table_suffix DESC
-"""
-
-    tables_for_dataset_without_partition_data = f"""
-SELECT
-  t.table_catalog as table_catalog,
-  t.table_schema as table_schema,
-  t.table_name as table_name,
-  t.table_type as table_type,
-  t.creation_time as created,
-  tos.OPTION_VALUE as comment,
-  is_insertable_into,
-  ddl,
-  REGEXP_EXTRACT(t.table_name, r".*_(\\d+)$") as table_suffix,
-  REGEXP_REPLACE(t.table_name, r"_(\\d+)$", "") as table_base
-
-FROM
-  `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t
-  left join `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLE_OPTIONS as tos on t.table_schema = tos.table_schema
-  and t.TABLE_NAME = tos.TABLE_NAME
-  and tos.OPTION_NAME = "description"
-WHERE
-  table_type in ('{BigqueryTableType.BASE_TABLE}', '{BigqueryTableType.EXTERNAL}')
-{{table_filter}}
-order by
-  table_schema ASC,
-  table_base ASC,
-  table_suffix DESC
-"""
-
-    views_for_dataset: str = f"""
-SELECT
-  t.table_catalog as table_catalog,
-  t.table_schema as table_schema,
-  t.table_name as table_name,
-  t.table_type as table_type,
-  t.creation_time as created,
-  ts.last_modified_time as last_altered,
-  tos.OPTION_VALUE as comment,
-  is_insertable_into,
-  ddl as view_definition,
-  row_count,
-  size_bytes
-FROM
-  `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t
-  join `{{project_id}}`.`{{dataset_name}}`.__TABLES__ as ts on ts.table_id = t.TABLE_NAME
-  left join `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLE_OPTIONS as tos on t.table_schema = tos.table_schema
-  and t.TABLE_NAME = tos.TABLE_NAME
-  and tos.OPTION_NAME = "description"
-WHERE
-  table_type in ('{BigqueryTableType.VIEW}', '{BigqueryTableType.MATERIALIZED_VIEW}')
-order by
-  table_schema ASC,
-  table_name ASC
-"""
-
-    views_for_dataset_without_data_read: str = f"""
-SELECT
-  t.table_catalog as table_catalog,
-  t.table_schema as table_schema,
-  t.table_name as table_name,
-  t.table_type as table_type,
-  t.creation_time as created,
-  tos.OPTION_VALUE as comment,
-  is_insertable_into,
-  ddl as view_definition
-FROM
-  `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t
-  left join `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLE_OPTIONS as tos on t.table_schema = tos.table_schema
-  and t.TABLE_NAME = tos.TABLE_NAME
-  and tos.OPTION_NAME = "description"
-WHERE
-  table_type in ('{BigqueryTableType.VIEW}', '{BigqueryTableType.MATERIALIZED_VIEW}')
-order by
-  table_schema ASC,
-  table_name ASC
-"""
-
-    columns_for_dataset: str = """
-select
-  c.table_catalog as table_catalog,
-  c.table_schema as table_schema,
-  c.table_name as table_name,
-  c.column_name as column_name,
-  c.ordinal_position as ordinal_position,
-  cfp.field_path as field_path,
-  c.is_nullable as is_nullable,
-  CASE WHEN CONTAINS_SUBSTR(field_path, ".") THEN NULL ELSE c.data_type END as data_type,
-  description as comment,
-  c.is_hidden as is_hidden,
-  c.is_partitioning_column as is_partitioning_column,
-  c.clustering_ordinal_position as clustering_ordinal_position,
-from
-  `{project_id}`.`{dataset_name}`.INFORMATION_SCHEMA.COLUMNS c
-  join `{project_id}`.`{dataset_name}`.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS as cfp on cfp.table_name = c.table_name
-  and cfp.column_name = c.column_name
-ORDER BY
-  table_catalog, table_schema, table_name, ordinal_position ASC, data_type DESC"""
-
-    optimized_columns_for_dataset: str = """
-select * from
-(select
-  c.table_catalog as table_catalog,
-  c.table_schema as table_schema,
-  c.table_name as table_name,
-  c.column_name as column_name,
-  c.ordinal_position as ordinal_position,
-  cfp.field_path as field_path,
-  c.is_nullable as is_nullable,
-  CASE WHEN CONTAINS_SUBSTR(field_path, ".") THEN NULL ELSE c.data_type END as data_type,
-  description as comment,
-  c.is_hidden as is_hidden,
-  c.is_partitioning_column as is_partitioning_column,
-  c.clustering_ordinal_position as clustering_ordinal_position,
-  -- We count the columns to be able limit it later
-  row_number() over (partition by c.table_catalog, c.table_schema, c.table_name order by c.ordinal_position asc, c.data_type DESC) as column_num,
-  -- Getting the maximum shard for each table
-  row_number() over (partition by c.table_catalog, c.table_schema, ifnull(REGEXP_EXTRACT(c.table_name, r'(.*)_\\d{{8}}$'), c.table_name), cfp.field_path order by c.table_catalog, c.table_schema asc, c.table_name desc) as shard_num
-from
-  `{project_id}`.`{dataset_name}`.INFORMATION_SCHEMA.COLUMNS c
-  join `{project_id}`.`{dataset_name}`.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS as cfp on cfp.table_name = c.table_name
-  and cfp.column_name = c.column_name
-  )
--- We filter column limit + 1 to make sure we warn about the limit being reached but not reading too much data
-where column_num <= {column_limit} and shard_num = 1
-ORDER BY
-  table_catalog, table_schema, table_name, ordinal_position, column_num ASC, data_type DESC"""
-
-    columns_for_table: str = """
-select
-  c.table_catalog as table_catalog,
-  c.table_schema as table_schema,
-  c.table_name as table_name,
-  c.column_name as column_name,
-  c.ordinal_position as ordinal_position,
-  cfp.field_path as field_path,
-  c.is_nullable as is_nullable,
-  CASE WHEN CONTAINS_SUBSTR(field_path, ".") THEN NULL ELSE c.data_type END as data_type,
-  c.is_hidden as is_hidden,
-  c.is_partitioning_column as is_partitioning_column,
-  c.clustering_ordinal_position as clustering_ordinal_position,
-  description as comment
-from
-  `{table_identifier.project_id}`.`{table_identifier.dataset}`.INFORMATION_SCHEMA.COLUMNS as c
-  join `{table_identifier.project_id}`.`{table_identifier.dataset}`.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS as cfp on cfp.table_name = c.table_name
-  and cfp.column_name = c.column_name
-where
-  c.table_name = '{table_identifier.table}'
-ORDER BY
-  table_catalog, table_schema, table_name, ordinal_position ASC, data_type DESC"""
-
-
-class BigQueryDataDictionary:
-    @staticmethod
-    def get_query_result(conn: bigquery.Client, query: str) -> RowIterator:
+class BigQuerySchemaApi:
+    def __init__(
+        self, report: BigQuerySchemaApiPerfReport, client: bigquery.Client
+    ) -> None:
+        self.bq_client = client
+        self.report = report
+
+    def get_query_result(self, query: str) -> RowIterator:
         logger.debug(f"Query : {query}")
-        resp = conn.query(query)
+        resp = self.bq_client.query(query)
         return resp.result()
 
-    @staticmethod
-    def get_projects(conn: bigquery.Client) -> List[BigqueryProject]:
-        projects = conn.list_projects()
+    def get_projects(self) -> List[BigqueryProject]:
+        with self.report.list_projects:
+            try:
+                projects = self.bq_client.list_projects()
 
-        return [
-            BigqueryProject(id=p.project_id, name=p.friendly_name) for p in projects
-        ]
+                return [
+                    BigqueryProject(id=p.project_id, name=p.friendly_name)
+                    for p in projects
+                ]
+            except Exception as e:
+                logger.error(f"Error getting projects. {e}", exc_info=True)
+                return []
 
-    @staticmethod
     def get_datasets_for_project_id(
-        conn: bigquery.Client, project_id: str, maxResults: Optional[int] = None
+        self, project_id: str, maxResults: Optional[int] = None
     ) -> List[BigqueryDataset]:
-        datasets = conn.list_datasets(project_id, max_results=maxResults)
-        return [BigqueryDataset(name=d.dataset_id, labels=d.labels) for d in datasets]
+        with self.report.list_datasets:
+            datasets = self.bq_client.list_datasets(project_id, max_results=maxResults)
+            return [
+                BigqueryDataset(name=d.dataset_id, labels=d.labels) for d in datasets
+            ]
 
-    @staticmethod
+    # This is not used anywhere
     def get_datasets_for_project_id_with_information_schema(
-        conn: bigquery.Client, project_id: str
+        self, project_id: str
     ) -> List[BigqueryDataset]:
         """
         This method is not used as of now, due to below limitation.
@@ -383,8 +170,7 @@ def get_datasets_for_project_id_with_information_schema(
         We'll need Region wise separate queries to fetch all datasets
         https://cloud.google.com/bigquery/docs/information-schema-datasets-schemata
         """
-        schemas = BigQueryDataDictionary.get_query_result(
-            conn,
+        schemas = self.get_query_result(
             BigqueryQuery.datasets_for_project_id.format(project_id=project_id),
         )
         return [
@@ -398,56 +184,67 @@ def get_datasets_for_project_id_with_information_schema(
             for s in schemas
         ]
 
-    @staticmethod
+    def list_tables(
+        self, dataset_name: str, project_id: str
+    ) -> Iterator[TableListItem]:
+        with self.report.list_tables as current_timer:
+            for table in self.bq_client.list_tables(f"{project_id}.{dataset_name}"):
+                with current_timer.pause():
+                    yield table
+
     def get_tables_for_dataset(
-        conn: bigquery.Client,
+        self,
         project_id: str,
         dataset_name: str,
         tables: Dict[str, TableListItem],
         with_data_read_permission: bool = False,
         report: Optional[BigQueryV2Report] = None,
     ) -> Iterator[BigqueryTable]:
-        filter: str = ", ".join(f"'{table}'" for table in tables.keys())
-
-        if with_data_read_permission:
-            # Tables are ordered by name and table suffix to make sure we always process the latest sharded table
-            # and skip the others. Sharded tables are tables with suffix _20220102
-            cur = BigQueryDataDictionary.get_query_result(
-                conn,
-                BigqueryQuery.tables_for_dataset.format(
-                    project_id=project_id,
-                    dataset_name=dataset_name,
-                    table_filter=f" and t.table_name in ({filter})" if filter else "",
-                ),
-            )
-        else:
-            # Tables are ordered by name and table suffix to make sure we always process the latest sharded table
-            # and skip the others. Sharded tables are tables with suffix _20220102
-            cur = BigQueryDataDictionary.get_query_result(
-                conn,
-                BigqueryQuery.tables_for_dataset_without_partition_data.format(
-                    project_id=project_id,
-                    dataset_name=dataset_name,
-                    table_filter=f" and t.table_name in ({filter})" if filter else "",
-                ),
-            )
-
-        for table in cur:
-            try:
-                yield BigQueryDataDictionary._make_bigquery_table(
-                    table, tables.get(table.table_name)
+        with self.report.get_tables_for_dataset as current_timer:
+            filter_clause: str = ", ".join(f"'{table}'" for table in tables.keys())
+
+            if with_data_read_permission:
+                # Tables are ordered by name and table suffix to make sure we always process the latest sharded table
+                # and skip the others. Sharded tables are tables with suffix _20220102
+                cur = self.get_query_result(
+                    BigqueryQuery.tables_for_dataset.format(
+                        project_id=project_id,
+                        dataset_name=dataset_name,
+                        table_filter=f" and t.table_name in ({filter_clause})"
+                        if filter_clause
+                        else "",
+                    ),
                 )
-            except Exception as e:
-                table_name = f"{project_id}.{dataset_name}.{table.table_name}"
-                logger.warning(
-                    f"Error while processing table {table_name}",
-                    exc_info=True,
+            else:
+                # Tables are ordered by name and table suffix to make sure we always process the latest sharded table
+                # and skip the others. Sharded tables are tables with suffix _20220102
+                cur = self.get_query_result(
+                    BigqueryQuery.tables_for_dataset_without_partition_data.format(
+                        project_id=project_id,
+                        dataset_name=dataset_name,
+                        table_filter=f" and t.table_name in ({filter_clause})"
+                        if filter_clause
+                        else "",
+                    ),
                 )
-                if report:
-                    report.report_warning(
-                        "metadata-extraction",
-                        f"Failed to get table {table_name}: {e}",
+
+            for table in cur:
+                try:
+                    with current_timer.pause():
+                        yield BigQuerySchemaApi._make_bigquery_table(
+                            table, tables.get(table.table_name)
+                        )
+                except Exception as e:
+                    table_name = f"{project_id}.{dataset_name}.{table.table_name}"
+                    logger.warning(
+                        f"Error while processing table {table_name}",
+                        exc_info=True,
                     )
+                    if report:
+                        report.report_warning(
+                            "metadata-extraction",
+                            f"Failed to get table {table_name}: {e}",
+                        )
 
     @staticmethod
     def _make_bigquery_table(
@@ -487,43 +284,42 @@ def _make_bigquery_table(
             long_term_billable_bytes=table.get("long_term_billable_bytes"),
         )
 
-    @staticmethod
     def get_views_for_dataset(
-        conn: bigquery.Client,
+        self,
         project_id: str,
         dataset_name: str,
         has_data_read: bool,
         report: Optional[BigQueryV2Report] = None,
     ) -> Iterator[BigqueryView]:
-        if has_data_read:
-            cur = BigQueryDataDictionary.get_query_result(
-                conn,
-                BigqueryQuery.views_for_dataset.format(
-                    project_id=project_id, dataset_name=dataset_name
-                ),
-            )
-        else:
-            cur = BigQueryDataDictionary.get_query_result(
-                conn,
-                BigqueryQuery.views_for_dataset_without_data_read.format(
-                    project_id=project_id, dataset_name=dataset_name
-                ),
-            )
-
-        for table in cur:
-            try:
-                yield BigQueryDataDictionary._make_bigquery_view(table)
-            except Exception as e:
-                view_name = f"{project_id}.{dataset_name}.{table.table_name}"
-                logger.warning(
-                    f"Error while processing view {view_name}",
-                    exc_info=True,
+        with self.report.get_views_for_dataset as current_timer:
+            if has_data_read:
+                cur = self.get_query_result(
+                    BigqueryQuery.views_for_dataset.format(
+                        project_id=project_id, dataset_name=dataset_name
+                    ),
+                )
+            else:
+                cur = self.get_query_result(
+                    BigqueryQuery.views_for_dataset_without_data_read.format(
+                        project_id=project_id, dataset_name=dataset_name
+                    ),
                 )
-                if report:
-                    report.report_warning(
-                        "metadata-extraction",
-                        f"Failed to get view {view_name}: {e}",
+
+            for table in cur:
+                try:
+                    with current_timer.pause():
+                        yield BigQuerySchemaApi._make_bigquery_view(table)
+                except Exception as e:
+                    view_name = f"{project_id}.{dataset_name}.{table.table_name}"
+                    logger.warning(
+                        f"Error while processing view {view_name}",
+                        exc_info=True,
                     )
+                    if report:
+                        report.report_warning(
+                            "metadata-extraction",
+                            f"Failed to get view {view_name}: {e}",
+                        )
 
     @staticmethod
     def _make_bigquery_view(view: bigquery.Row) -> BigqueryView:
@@ -540,70 +336,68 @@ def _make_bigquery_view(view: bigquery.Row) -> BigqueryView:
             materialized=view.table_type == BigqueryTableType.MATERIALIZED_VIEW,
         )
 
-    @staticmethod
     def get_columns_for_dataset(
-        conn: bigquery.Client,
+        self,
         project_id: str,
         dataset_name: str,
         column_limit: int,
         run_optimized_column_query: bool = False,
     ) -> Optional[Dict[str, List[BigqueryColumn]]]:
         columns: Dict[str, List[BigqueryColumn]] = defaultdict(list)
-        try:
-            cur = BigQueryDataDictionary.get_query_result(
-                conn,
-                BigqueryQuery.columns_for_dataset.format(
-                    project_id=project_id, dataset_name=dataset_name
-                )
-                if not run_optimized_column_query
-                else BigqueryQuery.optimized_columns_for_dataset.format(
-                    project_id=project_id,
-                    dataset_name=dataset_name,
-                    column_limit=column_limit,
-                ),
-            )
-        except Exception as e:
-            logger.warning(f"Columns for dataset query failed with exception: {e}")
-            # Error - Information schema query returned too much data.
-            # Please repeat query with more selective predicates.
-            return None
-
-        last_seen_table: str = ""
-        for column in cur:
-            if (
-                column_limit
-                and column.table_name in columns
-                and len(columns[column.table_name]) >= column_limit
-            ):
-                if last_seen_table != column.table_name:
-                    logger.warning(
-                        f"{project_id}.{dataset_name}.{column.table_name} contains more than {column_limit} columns, only processing {column_limit} columns"
-                    )
-                    last_seen_table = column.table_name
-            else:
-                columns[column.table_name].append(
-                    BigqueryColumn(
-                        name=column.column_name,
-                        ordinal_position=column.ordinal_position,
-                        field_path=column.field_path,
-                        is_nullable=column.is_nullable == "YES",
-                        data_type=column.data_type,
-                        comment=column.comment,
-                        is_partition_column=column.is_partitioning_column == "YES",
-                        cluster_column_position=column.clustering_ordinal_position,
+        with self.report.get_columns_for_dataset:
+            try:
+                cur = self.get_query_result(
+                    BigqueryQuery.columns_for_dataset.format(
+                        project_id=project_id, dataset_name=dataset_name
                     )
+                    if not run_optimized_column_query
+                    else BigqueryQuery.optimized_columns_for_dataset.format(
+                        project_id=project_id,
+                        dataset_name=dataset_name,
+                        column_limit=column_limit,
+                    ),
                 )
+            except Exception as e:
+                logger.warning(f"Columns for dataset query failed with exception: {e}")
+                # Error - Information schema query returned too much data.
+                # Please repeat query with more selective predicates.
+                return None
+
+            last_seen_table: str = ""
+            for column in cur:
+                if (
+                    column_limit
+                    and column.table_name in columns
+                    and len(columns[column.table_name]) >= column_limit
+                ):
+                    if last_seen_table != column.table_name:
+                        logger.warning(
+                            f"{project_id}.{dataset_name}.{column.table_name} contains more than {column_limit} columns, only processing {column_limit} columns"
+                        )
+                        last_seen_table = column.table_name
+                else:
+                    columns[column.table_name].append(
+                        BigqueryColumn(
+                            name=column.column_name,
+                            ordinal_position=column.ordinal_position,
+                            field_path=column.field_path,
+                            is_nullable=column.is_nullable == "YES",
+                            data_type=column.data_type,
+                            comment=column.comment,
+                            is_partition_column=column.is_partitioning_column == "YES",
+                            cluster_column_position=column.clustering_ordinal_position,
+                        )
+                    )
 
         return columns
 
-    @staticmethod
+    # This is not used anywhere
     def get_columns_for_table(
-        conn: bigquery.Client,
+        self,
         table_identifier: BigqueryTableIdentifier,
         column_limit: Optional[int],
     ) -> List[BigqueryColumn]:
-        cur = BigQueryDataDictionary.get_query_result(
-            conn,
+        cur = self.get_query_result(
             BigqueryQuery.columns_for_table.format(table_identifier=table_identifier),
         )
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/common.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/common.py
index 4ff509858b87d..e38ab07855b8b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/common.py
@@ -1,39 +1,5 @@
-from typing import Any, Dict, Optional
-
-from google.cloud import bigquery
-from google.cloud.logging_v2.client import Client as GCPLoggingClient
-
-from datahub.ingestion.source.bigquery_v2.bigquery_config import BigQueryV2Config
-
 BQ_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
 BQ_DATE_SHARD_FORMAT = "%Y%m%d"
 
 BQ_EXTERNAL_TABLE_URL_TEMPLATE = "https://console.cloud.google.com/bigquery?project={project}&ws=!1m5!1m4!4m3!1s{project}!2s{dataset}!3s{table}"
 BQ_EXTERNAL_DATASET_URL_TEMPLATE = "https://console.cloud.google.com/bigquery?project={project}&ws=!1m4!1m3!3m2!1s{project}!2s{dataset}"
-
-
-def _make_gcp_logging_client(
-    project_id: Optional[str] = None, extra_client_options: Dict[str, Any] = {}
-) -> GCPLoggingClient:
-    # See https://github.com/googleapis/google-cloud-python/issues/2674 for
-    # why we disable gRPC here.
-    client_options = extra_client_options.copy()
-    client_options["_use_grpc"] = False
-    if project_id is not None:
-        return GCPLoggingClient(**client_options, project=project_id)
-    else:
-        return GCPLoggingClient(**client_options)
-
-
-def get_bigquery_client(config: BigQueryV2Config) -> bigquery.Client:
-    client_options = config.extra_client_options
-    return bigquery.Client(config.project_on_behalf, **client_options)
-
-
-def get_sql_alchemy_url(config: BigQueryV2Config) -> str:
-    if config.project_on_behalf:
-        return f"bigquery://{config.project_on_behalf}"
-    # When project_id is not set, we will attempt to detect the project ID
-    # based on the credentials or environment variables.
-    # See https://github.com/mxmzdlv/pybigquery#authentication.
-    return "bigquery://"
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
index 341952d95e7d7..98c8cbaf85eec 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
@@ -1,7 +1,6 @@
 import collections
 import itertools
 import logging
-import textwrap
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from typing import (
@@ -18,12 +17,12 @@
 )
 
 import humanfriendly
-from google.cloud.bigquery import Client as BigQueryClient
 from google.cloud.datacatalog import lineage_v1
 from google.cloud.logging_v2.client import Client as GCPLoggingClient
-from ratelimiter import RateLimiter
 
 from datahub.emitter import mce_builder
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.bigquery_v2.bigquery_audit import (
     AuditLogEntry,
     BigQueryAuditMetadata,
@@ -32,13 +31,16 @@
     QueryEvent,
     ReadEvent,
 )
+from datahub.ingestion.source.bigquery_v2.bigquery_audit_log_api import (
+    BigQueryAuditLogApi,
+)
 from datahub.ingestion.source.bigquery_v2.bigquery_config import BigQueryV2Config
 from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report
-from datahub.ingestion.source.bigquery_v2.common import (
-    BQ_DATE_SHARD_FORMAT,
-    BQ_DATETIME_FORMAT,
-    _make_gcp_logging_client,
-    get_bigquery_client,
+from datahub.ingestion.source.bigquery_v2.bigquery_schema import BigQuerySchemaApi
+from datahub.ingestion.source.bigquery_v2.common import BQ_DATETIME_FORMAT
+from datahub.ingestion.source.bigquery_v2.queries import (
+    BQ_FILTER_RULE_TEMPLATE_V2_LINEAGE,
+    bigquery_audit_metadata_query_template_lineage,
 )
 from datahub.ingestion.source.state.redundant_run_skip_handler import (
     RedundantLineageRunSkipHandler,
@@ -52,7 +54,9 @@
     UpstreamClass,
     UpstreamLineageClass,
 )
+from datahub.specific.dataset import DatasetPatchBuilder
 from datahub.utilities import memory_footprint
+from datahub.utilities.file_backed_collections import FileBackedDict
 from datahub.utilities.perf_timer import PerfTimer
 from datahub.utilities.sqlglot_lineage import (
     SchemaResolver,
@@ -194,49 +198,21 @@ def make_lineage_edges_from_parsing_result(
 
 
 class BigqueryLineageExtractor:
-    BQ_FILTER_RULE_TEMPLATE_V2 = """
-resource.type=("bigquery_project")
-AND
-(
-    protoPayload.methodName=
-        (
-            "google.cloud.bigquery.v2.JobService.Query"
-            OR
-            "google.cloud.bigquery.v2.JobService.InsertJob"
-        )
-    AND
-    protoPayload.metadata.jobChange.job.jobStatus.jobState="DONE"
-    AND NOT protoPayload.metadata.jobChange.job.jobStatus.errorResult:*
-    AND (
-        protoPayload.metadata.jobChange.job.jobStats.queryStats.referencedTables:*
-        OR
-        protoPayload.metadata.jobChange.job.jobStats.queryStats.referencedViews:*
-    )
-    AND (
-        protoPayload.metadata.jobChange.job.jobStats.queryStats.referencedTables !~ "projects/.*/datasets/_.*/tables/anon.*"
-        AND
-        protoPayload.metadata.jobChange.job.jobStats.queryStats.referencedTables !~ "projects/.*/datasets/.*/tables/INFORMATION_SCHEMA.*"
-        AND
-        protoPayload.metadata.jobChange.job.jobStats.queryStats.referencedTables !~ "projects/.*/datasets/.*/tables/__TABLES__"
-        AND
-        protoPayload.metadata.jobChange.job.jobConfig.queryConfig.destinationTable !~ "projects/.*/datasets/_.*/tables/anon.*"
-    )
-
-)
-AND
-timestamp >= "{start_time}"
-AND
-timestamp < "{end_time}"
-""".strip()
-
     def __init__(
         self,
         config: BigQueryV2Config,
         report: BigQueryV2Report,
+        dataset_urn_builder: Callable[[BigQueryTableRef], str],
         redundant_run_skip_handler: Optional[RedundantLineageRunSkipHandler] = None,
     ):
         self.config = config
         self.report = report
+        self.dataset_urn_builder = dataset_urn_builder
+        self.audit_log_api = BigQueryAuditLogApi(
+            report.audit_log_api_perf,
+            self.config.rate_limit,
+            self.config.requests_per_min,
+        )
 
         self.redundant_run_skip_handler = redundant_run_skip_handler
         self.start_time, self.end_time = (
@@ -256,55 +232,205 @@ def error(self, log: logging.Logger, key: str, reason: str) -> None:
         self.report.report_warning(key, reason)
         log.error(f"{key} => {reason}")
 
-    @staticmethod
-    def bigquery_audit_metadata_query_template(
-        dataset: str, use_date_sharded_tables: bool, limit: Optional[int] = None
-    ) -> str:
-        """
-        Receives a dataset (with project specified) and returns a query template that is used to query exported
-        AuditLogs containing protoPayloads of type BigQueryAuditMetadata.
-        Include only those that:
-        - have been completed (jobStatus.jobState = "DONE")
-        - do not contain errors (jobStatus.errorResults is none)
-        :param dataset: the dataset to query against in the form of $PROJECT.$DATASET
-        :param use_date_sharded_tables: whether to read from date sharded audit log tables or time partitioned audit log
-               tables
-        :param limit: set a limit for the maximum event to return. It is used for connection testing currently
-        :return: a query template, when supplied start_time and end_time, can be used to query audit logs from BigQuery
-        """
-        limit_text = f"limit {limit}" if limit else ""
+    def _should_ingest_lineage(self) -> bool:
+        if (
+            self.redundant_run_skip_handler
+            and self.redundant_run_skip_handler.should_skip_this_run(
+                cur_start_time=self.config.start_time,
+                cur_end_time=self.config.end_time,
+            )
+        ):
+            # Skip this run
+            self.report.report_warning(
+                "lineage-extraction",
+                "Skip this run as there was already a run for current ingestion window.",
+            )
+            return False
+
+        return True
+
+    def get_lineage_workunits(
+        self,
+        projects: List[str],
+        sql_parser_schema_resolver: SchemaResolver,
+        view_refs_by_project: Dict[str, Set[str]],
+        view_definitions: FileBackedDict[str],
+        table_refs: Set[str],
+    ) -> Iterable[MetadataWorkUnit]:
+        if not self._should_ingest_lineage():
+            return
+        views_skip_audit_log_lineage: Set[str] = set()
+        if self.config.lineage_parse_view_ddl:
+            view_lineage: Dict[str, Set[LineageEdge]] = {}
+            for project in projects:
+                self.populate_view_lineage_with_sql_parsing(
+                    view_lineage,
+                    view_refs_by_project[project],
+                    view_definitions,
+                    sql_parser_schema_resolver,
+                    project,
+                )
 
-        shard_condition = ""
-        if use_date_sharded_tables:
-            from_table = f"`{dataset}.cloudaudit_googleapis_com_data_access_*`"
-            shard_condition = (
-                """ AND _TABLE_SUFFIX BETWEEN "{start_date}" AND "{end_date}" """
+                views_skip_audit_log_lineage.update(view_lineage.keys())
+                for lineage_key in view_lineage.keys():
+                    yield from self.gen_lineage_workunits_for_table(
+                        view_lineage, BigQueryTableRef.from_string_name(lineage_key)
+                    )
+
+        if self.config.use_exported_bigquery_audit_metadata:
+            projects = ["*"]  # project_id not used when using exported metadata
+
+        for project in projects:
+            self.report.set_ingestion_stage(project, "Lineage Extraction")
+            yield from self.generate_lineage(
+                project,
+                sql_parser_schema_resolver,
+                views_skip_audit_log_lineage,
+                table_refs,
             )
-        else:
-            from_table = f"`{dataset}.cloudaudit_googleapis_com_data_access`"
-
-        query = f"""
-            SELECT
-                timestamp,
-                logName,
-                insertId,
-                protopayload_auditlog AS protoPayload,
-                protopayload_auditlog.metadataJson AS metadata
-            FROM
-                {from_table}
-            WHERE (
-                timestamp >= "{{start_time}}"
-                AND timestamp < "{{end_time}}"
+
+        if self.redundant_run_skip_handler:
+            # Update the checkpoint state for this run.
+            self.redundant_run_skip_handler.update_state(
+                self.config.start_time, self.config.end_time
             )
-            {shard_condition}
-            AND protopayload_auditlog.serviceName="bigquery.googleapis.com"
-            AND JSON_EXTRACT_SCALAR(protopayload_auditlog.metadataJson, "$.jobChange.job.jobStatus.jobState") = "DONE"
-            AND JSON_EXTRACT(protopayload_auditlog.metadataJson, "$.jobChange.job.jobStatus.errorResults") IS NULL
-            AND JSON_EXTRACT(protopayload_auditlog.metadataJson, "$.jobChange.job.jobConfig.queryConfig") IS NOT NULL
-            {limit_text};
-        """
 
-        return textwrap.dedent(query)
+    def generate_lineage(
+        self,
+        project_id: str,
+        sql_parser_schema_resolver: SchemaResolver,
+        views_skip_audit_log_lineage: Set[str],
+        table_refs: Set[str],
+    ) -> Iterable[MetadataWorkUnit]:
+        logger.info(f"Generate lineage for {project_id}")
+        with PerfTimer() as timer:
+            try:
+                if self.config.extract_lineage_from_catalog:
+                    lineage = self.lineage_via_catalog_lineage_api(project_id)
+                else:
+                    events = self._get_parsed_audit_log_events(project_id)
+                    lineage = self._create_lineage_map(
+                        events, sql_parser_schema_resolver
+                    )
+            except Exception as e:
+                if project_id:
+                    self.report.lineage_failed_extraction.append(project_id)
+                self.error(
+                    logger,
+                    "lineage",
+                    f"{project_id}: {e}",
+                )
+                lineage = {}
+
+            self.report.lineage_metadata_entries[project_id] = len(lineage)
+            logger.info(f"Built lineage map containing {len(lineage)} entries.")
+            logger.debug(f"lineage metadata is {lineage}")
+            self.report.lineage_extraction_sec[project_id] = round(
+                timer.elapsed_seconds(), 2
+            )
+            self.report.lineage_mem_size[project_id] = humanfriendly.format_size(
+                memory_footprint.total_size(lineage)
+            )
+
+        for lineage_key in lineage.keys():
+            # For views, we do not use the upstreams obtained by parsing audit logs
+            # as they may contain indirectly referenced tables.
+            if (
+                lineage_key not in table_refs
+                or lineage_key in views_skip_audit_log_lineage
+            ):
+                continue
+
+            yield from self.gen_lineage_workunits_for_table(
+                lineage, BigQueryTableRef.from_string_name(lineage_key)
+            )
+
+    def populate_view_lineage_with_sql_parsing(
+        self,
+        view_lineage: Dict[str, Set[LineageEdge]],
+        view_refs: Set[str],
+        view_definitions: FileBackedDict[str],
+        sql_parser_schema_resolver: SchemaResolver,
+        default_project: str,
+    ) -> None:
+        for view in view_refs:
+            view_definition = view_definitions[view]
+            raw_view_lineage = sqlglot_lineage(
+                view_definition,
+                schema_resolver=sql_parser_schema_resolver,
+                default_db=default_project,
+            )
+            if raw_view_lineage.debug_info.table_error:
+                logger.debug(
+                    f"Failed to parse lineage for view {view}: {raw_view_lineage.debug_info.table_error}"
+                )
+                self.report.num_view_definitions_failed_parsing += 1
+                self.report.view_definitions_parsing_failures.append(
+                    f"Table-level sql parsing error for view {view}: {raw_view_lineage.debug_info.table_error}"
+                )
+                continue
+            elif raw_view_lineage.debug_info.column_error:
+                self.report.num_view_definitions_failed_column_parsing += 1
+                self.report.view_definitions_parsing_failures.append(
+                    f"Column-level sql parsing error for view {view}: {raw_view_lineage.debug_info.column_error}"
+                )
+            else:
+                self.report.num_view_definitions_parsed += 1
+
+            ts = datetime.now(timezone.utc)
+            view_lineage[view] = set(
+                make_lineage_edges_from_parsing_result(
+                    raw_view_lineage,
+                    audit_stamp=ts,
+                    lineage_type=DatasetLineageTypeClass.VIEW,
+                )
+            )
+
+    def gen_lineage_workunits_for_table(
+        self, lineage: Dict[str, Set[LineageEdge]], table_ref: BigQueryTableRef
+    ) -> Iterable[MetadataWorkUnit]:
+        dataset_urn = self.dataset_urn_builder(table_ref)
+
+        lineage_info = self.get_lineage_for_table(
+            bq_table=table_ref,
+            bq_table_urn=dataset_urn,
+            lineage_metadata=lineage,
+        )
+        if lineage_info:
+            yield from self.gen_lineage(dataset_urn, lineage_info)
+
+    def gen_lineage(
+        self,
+        dataset_urn: str,
+        upstream_lineage: Optional[UpstreamLineageClass] = None,
+    ) -> Iterable[MetadataWorkUnit]:
+        if upstream_lineage is None:
+            return
+
+        if upstream_lineage is not None:
+            if self.config.incremental_lineage:
+                patch_builder: DatasetPatchBuilder = DatasetPatchBuilder(
+                    urn=dataset_urn
+                )
+                for upstream in upstream_lineage.upstreams:
+                    patch_builder.add_upstream_lineage(upstream)
+
+                yield from [
+                    MetadataWorkUnit(
+                        id=f"upstreamLineage-for-{dataset_urn}",
+                        mcp_raw=mcp,
+                    )
+                    for mcp in patch_builder.build()
+                ]
+            else:
+                if not self.config.extract_column_lineage:
+                    upstream_lineage.fineGrainedLineages = None
+
+                yield from [
+                    MetadataChangeProposalWrapper(
+                        entityUrn=dataset_urn, aspect=upstream_lineage
+                    ).as_workunit()
+                ]
 
     def lineage_via_catalog_lineage_api(
         self, project_id: str
@@ -328,22 +454,28 @@ def lineage_via_catalog_lineage_api(
 
         try:
             lineage_client: lineage_v1.LineageClient = lineage_v1.LineageClient()
-            bigquery_client: BigQueryClient = get_bigquery_client(self.config)
+
+            data_dictionary = BigQuerySchemaApi(
+                self.report.schema_api_perf, self.config.get_bigquery_client()
+            )
+
             # Filtering datasets
-            datasets = list(bigquery_client.list_datasets(project_id))
+            datasets = list(data_dictionary.get_datasets_for_project_id(project_id))
             project_tables = []
             for dataset in datasets:
                 # Enables only tables where type is TABLE, VIEW or MATERIALIZED_VIEW (not EXTERNAL)
                 project_tables.extend(
                     [
                         table
-                        for table in bigquery_client.list_tables(dataset.dataset_id)
+                        for table in data_dictionary.list_tables(
+                            dataset.name, project_id
+                        )
                         if table.table_type in ["TABLE", "VIEW", "MATERIALIZED_VIEW"]
                     ]
                 )
 
             # Convert project tables to <project_id>.<dataset_id>.<table_id> format
-            project_tables = list(
+            project_table_names = list(
                 map(
                     lambda table: "{}.{}.{}".format(
                         table.project, table.dataset_id, table.table_id
@@ -354,7 +486,7 @@ def lineage_via_catalog_lineage_api(
 
             lineage_map: Dict[str, Set[LineageEdge]] = {}
             curr_date = datetime.now()
-            for table in project_tables:
+            for table in project_table_names:
                 logger.info("Creating lineage map for table %s", table)
                 upstreams = set()
                 downstream_table = lineage_v1.EntityReference()
@@ -411,127 +543,73 @@ def lineage_via_catalog_lineage_api(
             raise e
 
     def _get_parsed_audit_log_events(self, project_id: str) -> Iterable[QueryEvent]:
+        # We adjust the filter values a bit, since we need to make sure that the join
+        # between query events and read events is complete. For example, this helps us
+        # handle the case where the read happens within our time range but the query
+        # completion event is delayed and happens after the configured end time.
+        corrected_start_time = self.start_time - self.config.max_query_duration
+        corrected_end_time = self.end_time + -self.config.max_query_duration
+        self.report.log_entry_start_time = corrected_start_time
+        self.report.log_entry_end_time = corrected_end_time
+
         parse_fn: Callable[[Any], Optional[Union[ReadEvent, QueryEvent]]]
         if self.config.use_exported_bigquery_audit_metadata:
-            logger.info("Populating lineage info via exported GCP audit logs")
-            bq_client = get_bigquery_client(self.config)
-            entries = self._get_exported_bigquery_audit_metadata(bq_client)
+            entries = self.get_exported_log_entries(
+                corrected_start_time, corrected_end_time
+            )
             parse_fn = self._parse_exported_bigquery_audit_metadata
         else:
-            logger.info("Populating lineage info via exported GCP audit logs")
-            logging_client = _make_gcp_logging_client(project_id)
-            entries = self._get_bigquery_log_entries(logging_client)
+            entries = self.get_log_entries_via_gcp_logging(
+                project_id, corrected_start_time, corrected_end_time
+            )
             parse_fn = self._parse_bigquery_log_entries
 
         for entry in entries:
-            self.report.num_total_log_entries[project_id] += 1
+            self.report.num_lineage_total_log_entries[project_id] += 1
             try:
                 event = parse_fn(entry)
                 if event:
-                    self.report.num_parsed_log_entries[project_id] += 1
+                    self.report.num_lineage_parsed_log_entries[project_id] += 1
                     yield event
             except Exception as e:
                 logger.warning(f"Unable to parse log entry `{entry}`: {e}")
                 self.report.num_lineage_log_parse_failures[project_id] += 1
 
-    def _get_bigquery_log_entries(
-        self, client: GCPLoggingClient, limit: Optional[int] = None
-    ) -> Iterable[AuditLogEntry]:
-        self.report.num_total_log_entries[client.project] = 0
-        # Add a buffer to start and end time to account for delays in logging events.
-        start_time = (self.start_time - self.config.max_query_duration).strftime(
-            BQ_DATETIME_FORMAT
-        )
-        self.report.log_entry_start_time = start_time
-
-        end_time = (self.config.end_time + self.config.max_query_duration).strftime(
-            BQ_DATETIME_FORMAT
-        )
-        self.report.log_entry_end_time = end_time
-
-        filter = self.BQ_FILTER_RULE_TEMPLATE_V2.format(
-            start_time=start_time,
-            end_time=end_time,
-        )
-
-        logger.info(
-            f"Start loading log entries from BigQuery for {client.project} with start_time={start_time} and end_time={end_time}"
+    def get_exported_log_entries(
+        self, corrected_start_time, corrected_end_time, limit=None
+    ):
+        logger.info("Populating lineage info via exported GCP audit logs")
+        bq_client = self.config.get_bigquery_client()
+        entries = self.audit_log_api.get_exported_bigquery_audit_metadata(
+            bigquery_client=bq_client,
+            bigquery_audit_metadata_query_template=bigquery_audit_metadata_query_template_lineage,
+            bigquery_audit_metadata_datasets=self.config.bigquery_audit_metadata_datasets,
+            use_date_sharded_audit_log_tables=self.config.use_date_sharded_audit_log_tables,
+            start_time=corrected_start_time,
+            end_time=corrected_end_time,
+            limit=limit,
         )
+        return entries
 
-        if self.config.rate_limit:
-            with RateLimiter(max_calls=self.config.requests_per_min, period=60):
-                entries = client.list_entries(
-                    filter_=filter,
-                    page_size=self.config.log_page_size,
-                    max_results=limit,
-                )
-        else:
-            entries = client.list_entries(
-                filter_=filter, page_size=self.config.log_page_size, max_results=limit
-            )
+    def get_log_entries_via_gcp_logging(
+        self, project_id, corrected_start_time, corrected_end_time
+    ):
+        logger.info("Populating lineage info via exported GCP audit logs")
 
+        logging_client = self.config.make_gcp_logging_client(project_id)
         logger.info(
-            f"Start iterating over log entries from BigQuery for {client.project}"
+            f"Start loading log entries from BigQuery for {project_id} "
+            f"with start_time={corrected_start_time} and end_time={corrected_end_time}"
         )
-        for entry in entries:
-            self.report.num_total_log_entries[client.project] += 1
-            if self.report.num_total_log_entries[client.project] % 1000 == 0:
-                logger.info(
-                    f"{self.report.num_total_log_entries[client.project]} log entries loaded for project {client.project} so far..."
-                )
-            yield entry
-
-        logger.info(
-            f"Finished loading {self.report.num_total_log_entries[client.project]} log entries from BigQuery project {client.project} so far"
+        entries = self.audit_log_api.get_bigquery_log_entries_via_gcp_logging(
+            logging_client,
+            BQ_FILTER_RULE_TEMPLATE_V2_LINEAGE.format(
+                start_time=corrected_start_time.strftime(BQ_DATETIME_FORMAT),
+                end_time=corrected_end_time.strftime(BQ_DATETIME_FORMAT),
+            ),
+            self.config.log_page_size,
         )
-
-    def _get_exported_bigquery_audit_metadata(
-        self, bigquery_client: BigQueryClient, limit: Optional[int] = None
-    ) -> Iterable[BigQueryAuditMetadata]:
-        if self.config.bigquery_audit_metadata_datasets is None:
-            self.error(
-                logger, "audit-metadata", "bigquery_audit_metadata_datasets not set"
-            )
-            self.report.bigquery_audit_metadata_datasets_missing = True
-            return
-
-        corrected_start_time = self.start_time - self.config.max_query_duration
-        start_time = corrected_start_time.strftime(BQ_DATETIME_FORMAT)
-        start_date = corrected_start_time.strftime(BQ_DATE_SHARD_FORMAT)
-        self.report.audit_start_time = start_time
-
-        corrected_end_time = self.end_time + self.config.max_query_duration
-        end_time = corrected_end_time.strftime(BQ_DATETIME_FORMAT)
-        end_date = corrected_end_time.strftime(BQ_DATE_SHARD_FORMAT)
-        self.report.audit_end_time = end_time
-
-        for dataset in self.config.bigquery_audit_metadata_datasets:
-            logger.info(
-                f"Start loading log entries from BigQueryAuditMetadata in {dataset}"
-            )
-
-            query: str = self.bigquery_audit_metadata_query_template(
-                dataset=dataset,
-                use_date_sharded_tables=self.config.use_date_sharded_audit_log_tables,
-                limit=limit,
-            ).format(
-                start_time=start_time,
-                end_time=end_time,
-                start_date=start_date,
-                end_date=end_date,
-            )
-
-            query_job = bigquery_client.query(query)
-
-            logger.info(
-                f"Finished loading log entries from BigQueryAuditMetadata in {dataset}"
-            )
-
-            if self.config.rate_limit:
-                with RateLimiter(max_calls=self.config.requests_per_min, period=60):
-                    yield from query_job
-            else:
-                yield from query_job
+        return entries
 
     # Currently we only parse JobCompleted events but in future we would want to parse other
     # events to also create field level lineage.
@@ -674,39 +752,6 @@ def _create_lineage_map(
         logger.info("Exiting create lineage map function")
         return lineage_map
 
-    def _compute_bigquery_lineage(
-        self,
-        project_id: str,
-        sql_parser_schema_resolver: SchemaResolver,
-    ) -> Dict[str, Set[LineageEdge]]:
-        lineage_metadata: Dict[str, Set[LineageEdge]]
-        try:
-            if self.config.extract_lineage_from_catalog:
-                lineage_metadata = self.lineage_via_catalog_lineage_api(project_id)
-            else:
-                events = self._get_parsed_audit_log_events(project_id)
-                lineage_metadata = self._create_lineage_map(
-                    events, sql_parser_schema_resolver
-                )
-        except Exception as e:
-            if project_id:
-                self.report.lineage_failed_extraction.append(project_id)
-            self.error(
-                logger,
-                "lineage",
-                f"{project_id}: {e}",
-            )
-            self.report_status(f"{project_id}-lineage", False)
-            lineage_metadata = {}
-
-        self.report.lineage_mem_size[project_id] = humanfriendly.format_size(
-            memory_footprint.total_size(lineage_metadata)
-        )
-        self.report.lineage_metadata_entries[project_id] = len(lineage_metadata)
-        logger.info(f"Built lineage map containing {len(lineage_metadata)} entries.")
-        logger.debug(f"lineage metadata is {lineage_metadata}")
-        return lineage_metadata
-
     def get_upstream_tables(
         self,
         bq_table: BigQueryTableRef,
@@ -767,28 +812,11 @@ def get_upstream_tables(
 
         return set(upstreams.values())
 
-    def calculate_lineage_for_project(
-        self,
-        project_id: str,
-        sql_parser_schema_resolver: SchemaResolver,
-    ) -> Dict[str, Set[LineageEdge]]:
-        with PerfTimer() as timer:
-            lineage = self._compute_bigquery_lineage(
-                project_id, sql_parser_schema_resolver
-            )
-
-            self.report.lineage_extraction_sec[project_id] = round(
-                timer.elapsed_seconds(), 2
-            )
-
-        return lineage
-
     def get_lineage_for_table(
         self,
         bq_table: BigQueryTableRef,
         bq_table_urn: str,
         lineage_metadata: Dict[str, Set[LineageEdge]],
-        platform: str,
     ) -> Optional[UpstreamLineageClass]:
         upstream_list: List[UpstreamClass] = []
         fine_grained_lineages: List[FineGrainedLineageClass] = []
@@ -796,12 +824,7 @@ def get_lineage_for_table(
         # even if the lineage is same but the order is different.
         for upstream in sorted(self.get_upstream_tables(bq_table, lineage_metadata)):
             upstream_table = BigQueryTableRef.from_string_name(upstream.table)
-            upstream_table_urn = mce_builder.make_dataset_urn_with_platform_instance(
-                platform,
-                upstream_table.table_identifier.get_table_name(),
-                self.config.platform_instance,
-                self.config.env,
-            )
+            upstream_table_urn = self.dataset_urn_builder(upstream_table)
 
             # Generate table-level lineage.
             upstream_table_class = UpstreamClass(
@@ -852,19 +875,27 @@ def get_lineage_for_table(
 
     def test_capability(self, project_id: str) -> None:
         if self.config.use_exported_bigquery_audit_metadata:
-            bigquery_client: BigQueryClient = BigQueryClient(project=project_id)
-            entries = self._get_exported_bigquery_audit_metadata(
-                bigquery_client=bigquery_client, limit=1
-            )
-            for entry in entries:
+            for entry in self.get_exported_log_entries(
+                self.start_time,
+                self.end_time,
+                limit=1,
+            ):
                 logger.debug(
                     f"Connection test got one exported_bigquery_audit_metadata {entry}"
                 )
         else:
-            gcp_logging_client: GCPLoggingClient = _make_gcp_logging_client(
-                project_id, self.config.extra_client_options
+            gcp_logging_client: GCPLoggingClient = self.config.make_gcp_logging_client(
+                project_id
             )
-            for entry in self._get_bigquery_log_entries(gcp_logging_client, limit=1):
+            for entry in self.audit_log_api.get_bigquery_log_entries_via_gcp_logging(
+                gcp_logging_client,
+                filter=BQ_FILTER_RULE_TEMPLATE_V2_LINEAGE.format(
+                    self.start_time.strftime(BQ_DATETIME_FORMAT),
+                    self.end_time.strftime(BQ_DATETIME_FORMAT),
+                ),
+                log_page_size=self.config.log_page_size,
+                limit=1,
+            ):
                 logger.debug(f"Connection test got one audit metadata entry {entry}")
 
     def report_status(self, step: str, status: bool) -> None:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py
new file mode 100644
index 0000000000000..5be7a0a7f6b2f
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py
@@ -0,0 +1,426 @@
+import textwrap
+from typing import Optional
+
+
+class BigqueryTableType:
+    # See https://cloud.google.com/bigquery/docs/information-schema-tables#schema
+    BASE_TABLE = "BASE TABLE"
+    EXTERNAL = "EXTERNAL"
+    VIEW = "VIEW"
+    MATERIALIZED_VIEW = "MATERIALIZED VIEW"
+    CLONE = "CLONE"
+    SNAPSHOT = "SNAPSHOT"
+
+
+class BigqueryQuery:
+    show_datasets: str = (
+        "select schema_name from `{project_id}`.INFORMATION_SCHEMA.SCHEMATA"
+    )
+
+    datasets_for_project_id: str = """
+select
+  s.CATALOG_NAME as catalog_name,
+  s.schema_name as table_schema,
+  s.location as location,
+  s.CREATION_TIME as created,
+  s.LAST_MODIFIED_TIME as last_altered,
+  o.OPTION_VALUE as comment
+from
+  `{project_id}`.INFORMATION_SCHEMA.SCHEMATA as s
+  left join `{project_id}`.INFORMATION_SCHEMA.SCHEMATA_OPTIONS as o on o.schema_name = s.schema_name
+  and o.option_name = "description"
+order by
+  s.schema_name
+"""
+
+    # https://cloud.google.com/bigquery/docs/information-schema-table-storage?hl=en
+    tables_for_dataset = f"""
+SELECT
+  t.table_catalog as table_catalog,
+  t.table_schema as table_schema,
+  t.table_name as table_name,
+  t.table_type as table_type,
+  t.creation_time as created,
+  ts.last_modified_time as last_altered,
+  tos.OPTION_VALUE as comment,
+  is_insertable_into,
+  ddl,
+  row_count,
+  size_bytes as bytes,
+  num_partitions,
+  max_partition_id,
+  active_billable_bytes,
+  long_term_billable_bytes,
+  REGEXP_EXTRACT(t.table_name, r".*_(\\d+)$") as table_suffix,
+  REGEXP_REPLACE(t.table_name, r"_(\\d+)$", "") as table_base
+
+FROM
+  `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t
+  join `{{project_id}}`.`{{dataset_name}}`.__TABLES__ as ts on ts.table_id = t.TABLE_NAME
+  left join `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLE_OPTIONS as tos on t.table_schema = tos.table_schema
+  and t.TABLE_NAME = tos.TABLE_NAME
+  and tos.OPTION_NAME = "description"
+  left join (
+    select
+        table_name,
+        sum(case when partition_id not in ('__NULL__', '__UNPARTITIONED__', '__STREAMING_UNPARTITIONED__') then 1 else 0 END) as num_partitions,
+        max(case when partition_id not in ('__NULL__', '__UNPARTITIONED__', '__STREAMING_UNPARTITIONED__') then partition_id else NULL END) as max_partition_id,
+        sum(total_rows) as total_rows,
+        sum(case when storage_tier = 'LONG_TERM' then total_billable_bytes else 0 end) as long_term_billable_bytes,
+        sum(case when storage_tier = 'ACTIVE' then total_billable_bytes else 0 end) as active_billable_bytes,
+    from
+        `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.PARTITIONS
+    group by
+        table_name) as p on
+    t.table_name = p.table_name
+WHERE
+  table_type in ('{BigqueryTableType.BASE_TABLE}', '{BigqueryTableType.EXTERNAL}')
+{{table_filter}}
+order by
+  table_schema ASC,
+  table_base ASC,
+  table_suffix DESC
+"""
+
+    tables_for_dataset_without_partition_data = f"""
+SELECT
+  t.table_catalog as table_catalog,
+  t.table_schema as table_schema,
+  t.table_name as table_name,
+  t.table_type as table_type,
+  t.creation_time as created,
+  tos.OPTION_VALUE as comment,
+  is_insertable_into,
+  ddl,
+  REGEXP_EXTRACT(t.table_name, r".*_(\\d+)$") as table_suffix,
+  REGEXP_REPLACE(t.table_name, r"_(\\d+)$", "") as table_base
+
+FROM
+  `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t
+  left join `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLE_OPTIONS as tos on t.table_schema = tos.table_schema
+  and t.TABLE_NAME = tos.TABLE_NAME
+  and tos.OPTION_NAME = "description"
+WHERE
+  table_type in ('{BigqueryTableType.BASE_TABLE}', '{BigqueryTableType.EXTERNAL}')
+{{table_filter}}
+order by
+  table_schema ASC,
+  table_base ASC,
+  table_suffix DESC
+"""
+
+    views_for_dataset: str = f"""
+SELECT
+  t.table_catalog as table_catalog,
+  t.table_schema as table_schema,
+  t.table_name as table_name,
+  t.table_type as table_type,
+  t.creation_time as created,
+  ts.last_modified_time as last_altered,
+  tos.OPTION_VALUE as comment,
+  is_insertable_into,
+  ddl as view_definition,
+  row_count,
+  size_bytes
+FROM
+  `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t
+  join `{{project_id}}`.`{{dataset_name}}`.__TABLES__ as ts on ts.table_id = t.TABLE_NAME
+  left join `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLE_OPTIONS as tos on t.table_schema = tos.table_schema
+  and t.TABLE_NAME = tos.TABLE_NAME
+  and tos.OPTION_NAME = "description"
+WHERE
+  table_type in ('{BigqueryTableType.VIEW}', '{BigqueryTableType.MATERIALIZED_VIEW}')
+order by
+  table_schema ASC,
+  table_name ASC
+"""
+
+    views_for_dataset_without_data_read: str = f"""
+SELECT
+  t.table_catalog as table_catalog,
+  t.table_schema as table_schema,
+  t.table_name as table_name,
+  t.table_type as table_type,
+  t.creation_time as created,
+  tos.OPTION_VALUE as comment,
+  is_insertable_into,
+  ddl as view_definition
+FROM
+  `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t
+  left join `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLE_OPTIONS as tos on t.table_schema = tos.table_schema
+  and t.TABLE_NAME = tos.TABLE_NAME
+  and tos.OPTION_NAME = "description"
+WHERE
+  table_type in ('{BigqueryTableType.VIEW}', '{BigqueryTableType.MATERIALIZED_VIEW}')
+order by
+  table_schema ASC,
+  table_name ASC
+"""
+
+    columns_for_dataset: str = """
+select
+  c.table_catalog as table_catalog,
+  c.table_schema as table_schema,
+  c.table_name as table_name,
+  c.column_name as column_name,
+  c.ordinal_position as ordinal_position,
+  cfp.field_path as field_path,
+  c.is_nullable as is_nullable,
+  CASE WHEN CONTAINS_SUBSTR(field_path, ".") THEN NULL ELSE c.data_type END as data_type,
+  description as comment,
+  c.is_hidden as is_hidden,
+  c.is_partitioning_column as is_partitioning_column,
+  c.clustering_ordinal_position as clustering_ordinal_position,
+from
+  `{project_id}`.`{dataset_name}`.INFORMATION_SCHEMA.COLUMNS c
+  join `{project_id}`.`{dataset_name}`.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS as cfp on cfp.table_name = c.table_name
+  and cfp.column_name = c.column_name
+ORDER BY
+  table_catalog, table_schema, table_name, ordinal_position ASC, data_type DESC"""
+
+    optimized_columns_for_dataset: str = """
+select * from
+(select
+  c.table_catalog as table_catalog,
+  c.table_schema as table_schema,
+  c.table_name as table_name,
+  c.column_name as column_name,
+  c.ordinal_position as ordinal_position,
+  cfp.field_path as field_path,
+  c.is_nullable as is_nullable,
+  CASE WHEN CONTAINS_SUBSTR(field_path, ".") THEN NULL ELSE c.data_type END as data_type,
+  description as comment,
+  c.is_hidden as is_hidden,
+  c.is_partitioning_column as is_partitioning_column,
+  c.clustering_ordinal_position as clustering_ordinal_position,
+  -- We count the columns to be able limit it later
+  row_number() over (partition by c.table_catalog, c.table_schema, c.table_name order by c.ordinal_position asc, c.data_type DESC) as column_num,
+  -- Getting the maximum shard for each table
+  row_number() over (partition by c.table_catalog, c.table_schema, ifnull(REGEXP_EXTRACT(c.table_name, r'(.*)_\\d{{8}}$'), c.table_name), cfp.field_path order by c.table_catalog, c.table_schema asc, c.table_name desc) as shard_num
+from
+  `{project_id}`.`{dataset_name}`.INFORMATION_SCHEMA.COLUMNS c
+  join `{project_id}`.`{dataset_name}`.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS as cfp on cfp.table_name = c.table_name
+  and cfp.column_name = c.column_name
+  )
+-- We filter column limit + 1 to make sure we warn about the limit being reached but not reading too much data
+where column_num <= {column_limit} and shard_num = 1
+ORDER BY
+  table_catalog, table_schema, table_name, ordinal_position, column_num ASC, data_type DESC"""
+
+    columns_for_table: str = """
+select
+  c.table_catalog as table_catalog,
+  c.table_schema as table_schema,
+  c.table_name as table_name,
+  c.column_name as column_name,
+  c.ordinal_position as ordinal_position,
+  cfp.field_path as field_path,
+  c.is_nullable as is_nullable,
+  CASE WHEN CONTAINS_SUBSTR(field_path, ".") THEN NULL ELSE c.data_type END as data_type,
+  c.is_hidden as is_hidden,
+  c.is_partitioning_column as is_partitioning_column,
+  c.clustering_ordinal_position as clustering_ordinal_position,
+  description as comment
+from
+  `{table_identifier.project_id}`.`{table_identifier.dataset}`.INFORMATION_SCHEMA.COLUMNS as c
+  join `{table_identifier.project_id}`.`{table_identifier.dataset}`.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS as cfp on cfp.table_name = c.table_name
+  and cfp.column_name = c.column_name
+where
+  c.table_name = '{table_identifier.table}'
+ORDER BY
+  table_catalog, table_schema, table_name, ordinal_position ASC, data_type DESC"""
+
+
+BQ_FILTER_RULE_TEMPLATE_V2_LINEAGE = """
+resource.type=("bigquery_project")
+AND
+(
+    protoPayload.methodName=
+        (
+            "google.cloud.bigquery.v2.JobService.Query"
+            OR
+            "google.cloud.bigquery.v2.JobService.InsertJob"
+        )
+    AND
+    protoPayload.metadata.jobChange.job.jobStatus.jobState="DONE"
+    AND NOT protoPayload.metadata.jobChange.job.jobStatus.errorResult:*
+    AND (
+        protoPayload.metadata.jobChange.job.jobStats.queryStats.referencedTables:*
+        OR
+        protoPayload.metadata.jobChange.job.jobStats.queryStats.referencedViews:*
+    )
+    AND (
+        protoPayload.metadata.jobChange.job.jobStats.queryStats.referencedTables !~ "projects/.*/datasets/_.*/tables/anon.*"
+        AND
+        protoPayload.metadata.jobChange.job.jobStats.queryStats.referencedTables !~ "projects/.*/datasets/.*/tables/INFORMATION_SCHEMA.*"
+        AND
+        protoPayload.metadata.jobChange.job.jobStats.queryStats.referencedTables !~ "projects/.*/datasets/.*/tables/__TABLES__"
+        AND
+        protoPayload.metadata.jobChange.job.jobConfig.queryConfig.destinationTable !~ "projects/.*/datasets/_.*/tables/anon.*"
+    )
+
+)
+AND
+timestamp >= "{start_time}"
+AND
+timestamp < "{end_time}"
+""".strip()
+BQ_FILTER_RULE_TEMPLATE_V2_USAGE = """
+resource.type=("bigquery_project" OR "bigquery_dataset")
+AND
+timestamp >= "{start_time}"
+AND
+timestamp < "{end_time}"
+AND protoPayload.serviceName="bigquery.googleapis.com"
+AND
+(
+    (
+        protoPayload.methodName=
+            (
+                "google.cloud.bigquery.v2.JobService.Query"
+                OR
+                "google.cloud.bigquery.v2.JobService.InsertJob"
+            )
+        AND protoPayload.metadata.jobChange.job.jobStatus.jobState="DONE"
+        AND NOT protoPayload.metadata.jobChange.job.jobStatus.errorResult:*
+        AND protoPayload.metadata.jobChange.job.jobConfig.queryConfig:*
+        AND
+        (
+            (
+                protoPayload.metadata.jobChange.job.jobStats.queryStats.referencedTables:*
+                AND NOT protoPayload.metadata.jobChange.job.jobStats.queryStats.referencedTables =~ "projects/.*/datasets/.*/tables/__TABLES__|__TABLES_SUMMARY__|INFORMATION_SCHEMA.*"
+            )
+            OR
+            (
+                protoPayload.metadata.jobChange.job.jobConfig.queryConfig.destinationTable:*
+            )
+        )
+    )
+    OR
+    protoPayload.metadata.tableDataRead.reason = "JOB"
+)
+""".strip(
+    "\t \n"
+)
+
+
+def bigquery_audit_metadata_query_template_lineage(
+    dataset: str, use_date_sharded_tables: bool, limit: Optional[int] = None
+) -> str:
+    """
+    Receives a dataset (with project specified) and returns a query template that is used to query exported
+    AuditLogs containing protoPayloads of type BigQueryAuditMetadata.
+    Include only those that:
+    - have been completed (jobStatus.jobState = "DONE")
+    - do not contain errors (jobStatus.errorResults is none)
+    :param dataset: the dataset to query against in the form of $PROJECT.$DATASET
+    :param use_date_sharded_tables: whether to read from date sharded audit log tables or time partitioned audit log
+           tables
+    :param limit: set a limit for the maximum event to return. It is used for connection testing currently
+    :return: a query template, when supplied start_time and end_time, can be used to query audit logs from BigQuery
+    """
+    limit_text = f"limit {limit}" if limit else ""
+
+    shard_condition = ""
+    if use_date_sharded_tables:
+        from_table = f"`{dataset}.cloudaudit_googleapis_com_data_access_*`"
+        shard_condition = (
+            """ AND _TABLE_SUFFIX BETWEEN "{start_date}" AND "{end_date}" """
+        )
+    else:
+        from_table = f"`{dataset}.cloudaudit_googleapis_com_data_access`"
+
+    query = f"""
+            SELECT
+                timestamp,
+                logName,
+                insertId,
+                protopayload_auditlog AS protoPayload,
+                protopayload_auditlog.metadataJson AS metadata
+            FROM
+                {from_table}
+            WHERE (
+                timestamp >= "{{start_time}}"
+                AND timestamp < "{{end_time}}"
+            )
+            {shard_condition}
+            AND protopayload_auditlog.serviceName="bigquery.googleapis.com"
+            AND JSON_EXTRACT_SCALAR(protopayload_auditlog.metadataJson, "$.jobChange.job.jobStatus.jobState") = "DONE"
+            AND JSON_EXTRACT(protopayload_auditlog.metadataJson, "$.jobChange.job.jobStatus.errorResults") IS NULL
+            AND JSON_EXTRACT(protopayload_auditlog.metadataJson, "$.jobChange.job.jobConfig.queryConfig") IS NOT NULL
+            QUALIFY ROW_NUMBER() OVER (PARTITION BY insertId, timestamp, logName) = 1
+            {limit_text};
+        """
+
+    return textwrap.dedent(query)
+
+
+def bigquery_audit_metadata_query_template_usage(
+    dataset: str,
+    use_date_sharded_tables: bool,
+    limit: Optional[int] = None,
+) -> str:
+    """
+    Receives a dataset (with project specified) and returns a query template that is used to query exported
+    v2 AuditLogs containing protoPayloads of type BigQueryAuditMetadata.
+    :param dataset: the dataset to query against in the form of $PROJECT.$DATASET
+    :param use_date_sharded_tables: whether to read from date sharded audit log tables or time partitioned audit log
+           tables
+    :param limit: maximum number of events to query for
+    :return: a query template, when supplied start_time and end_time, can be used to query audit logs from BigQuery
+    """
+
+    limit_text = f"limit {limit}" if limit else ""
+
+    shard_condition = ""
+    if use_date_sharded_tables:
+        from_table = f"`{dataset}.cloudaudit_googleapis_com_data_access_*`"
+        shard_condition = (
+            """ AND _TABLE_SUFFIX BETWEEN "{start_date}" AND "{end_date}" """
+        )
+    else:
+        from_table = f"`{dataset}.cloudaudit_googleapis_com_data_access`"
+
+    # Deduplicates insertId via QUALIFY, see:
+    # https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry, insertId field
+    query = f"""
+        SELECT
+            timestamp,
+            logName,
+            insertId,
+            protopayload_auditlog AS protoPayload,
+            protopayload_auditlog.metadataJson AS metadata
+        FROM
+            {from_table}
+        WHERE (
+            timestamp >= "{{start_time}}"
+            AND timestamp < "{{end_time}}"
+        )
+        {shard_condition}
+        AND protopayload_auditlog.serviceName="bigquery.googleapis.com"
+        AND
+        (
+            (
+                protopayload_auditlog.methodName IN
+                    (
+                        "google.cloud.bigquery.v2.JobService.Query",
+                        "google.cloud.bigquery.v2.JobService.InsertJob"
+                    )
+                AND JSON_EXTRACT_SCALAR(protopayload_auditlog.metadataJson, "$.jobChange.job.jobStatus.jobState") = "DONE"
+                AND JSON_EXTRACT(protopayload_auditlog.metadataJson, "$.jobChange.job.jobStatus.errorResults") IS NULL
+                AND JSON_EXTRACT(protopayload_auditlog.metadataJson, "$.jobChange.job.jobConfig.queryConfig") IS NOT NULL
+                AND (
+                        JSON_EXTRACT_ARRAY(protopayload_auditlog.metadataJson,
+                                                            "$.jobChange.job.jobStats.queryStats.referencedTables") IS NOT NULL
+                    OR
+                        JSON_EXTRACT_SCALAR(protopayload_auditlog.metadataJson, "$.jobChange.job.jobConfig.queryConfig.destinationTable") IS NOT NULL
+                    )
+            )
+            OR
+                JSON_EXTRACT_SCALAR(protopayload_auditlog.metadataJson, "$.tableDataRead.reason") = "JOB"
+        )
+        QUALIFY ROW_NUMBER() OVER (PARTITION BY insertId, timestamp, logName) = 1
+        {limit_text};
+    """
+
+    return textwrap.dedent(query)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py
index e112db31c5c63..201567e104a51 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py
@@ -2,7 +2,6 @@
 import json
 import logging
 import os
-import textwrap
 import time
 import uuid
 from dataclasses import dataclass
@@ -21,9 +20,6 @@
 )
 
 import humanfriendly
-from google.cloud.bigquery import Client as BigQueryClient
-from google.cloud.logging_v2.client import Client as GCPLoggingClient
-from ratelimiter import RateLimiter
 
 from datahub.configuration.time_window_config import (
     BaseTimeWindowConfig,
@@ -35,8 +31,6 @@
 from datahub.ingestion.api.source_helpers import auto_empty_dataset_usage_statistics
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.bigquery_v2.bigquery_audit import (
-    BQ_AUDIT_V2,
-    BQ_FILTER_RULE_TEMPLATE,
     AuditEvent,
     AuditLogEntry,
     BigQueryAuditMetadata,
@@ -45,13 +39,15 @@
     QueryEvent,
     ReadEvent,
 )
+from datahub.ingestion.source.bigquery_v2.bigquery_audit_log_api import (
+    BigQueryAuditLogApi,
+)
 from datahub.ingestion.source.bigquery_v2.bigquery_config import BigQueryV2Config
 from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report
-from datahub.ingestion.source.bigquery_v2.common import (
-    BQ_DATE_SHARD_FORMAT,
-    BQ_DATETIME_FORMAT,
-    _make_gcp_logging_client,
-    get_bigquery_client,
+from datahub.ingestion.source.bigquery_v2.common import BQ_DATETIME_FORMAT
+from datahub.ingestion.source.bigquery_v2.queries import (
+    BQ_FILTER_RULE_TEMPLATE_V2_USAGE,
+    bigquery_audit_metadata_query_template_usage,
 )
 from datahub.ingestion.source.state.redundant_run_skip_handler import (
     RedundantUsageRunSkipHandler,
@@ -108,77 +104,6 @@ class OperationalDataMeta:
     custom_type: Optional[str] = None
 
 
-def bigquery_audit_metadata_query_template(
-    dataset: str,
-    use_date_sharded_tables: bool,
-    limit: Optional[int] = None,
-) -> str:
-    """
-    Receives a dataset (with project specified) and returns a query template that is used to query exported
-    v2 AuditLogs containing protoPayloads of type BigQueryAuditMetadata.
-    :param dataset: the dataset to query against in the form of $PROJECT.$DATASET
-    :param use_date_sharded_tables: whether to read from date sharded audit log tables or time partitioned audit log
-           tables
-    :param limit: maximum number of events to query for
-    :return: a query template, when supplied start_time and end_time, can be used to query audit logs from BigQuery
-    """
-
-    limit_text = f"limit {limit}" if limit else ""
-
-    shard_condition = ""
-    if use_date_sharded_tables:
-        from_table = f"`{dataset}.cloudaudit_googleapis_com_data_access_*`"
-        shard_condition = (
-            """ AND _TABLE_SUFFIX BETWEEN "{start_date}" AND "{end_date}" """
-        )
-    else:
-        from_table = f"`{dataset}.cloudaudit_googleapis_com_data_access`"
-
-    # Deduplicates insertId via QUALIFY, see:
-    # https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry, insertId field
-    query = f"""
-        SELECT
-            timestamp,
-            logName,
-            insertId,
-            protopayload_auditlog AS protoPayload,
-            protopayload_auditlog.metadataJson AS metadata
-        FROM
-            {from_table}
-        WHERE (
-            timestamp >= "{{start_time}}"
-            AND timestamp < "{{end_time}}"
-        )
-        {shard_condition}
-        AND protopayload_auditlog.serviceName="bigquery.googleapis.com"
-        AND
-        (
-            (
-                protopayload_auditlog.methodName IN
-                    (
-                        "google.cloud.bigquery.v2.JobService.Query",
-                        "google.cloud.bigquery.v2.JobService.InsertJob"
-                    )
-                AND JSON_EXTRACT_SCALAR(protopayload_auditlog.metadataJson, "$.jobChange.job.jobStatus.jobState") = "DONE"
-                AND JSON_EXTRACT(protopayload_auditlog.metadataJson, "$.jobChange.job.jobStatus.errorResults") IS NULL
-                AND JSON_EXTRACT(protopayload_auditlog.metadataJson, "$.jobChange.job.jobConfig.queryConfig") IS NOT NULL
-                AND (
-                        JSON_EXTRACT_ARRAY(protopayload_auditlog.metadataJson,
-                                                            "$.jobChange.job.jobStats.queryStats.referencedTables") IS NOT NULL
-                    OR
-                        JSON_EXTRACT_SCALAR(protopayload_auditlog.metadataJson, "$.jobChange.job.jobConfig.queryConfig.destinationTable") IS NOT NULL
-                    )
-            )
-            OR
-                JSON_EXTRACT_SCALAR(protopayload_auditlog.metadataJson, "$.tableDataRead.reason") = "JOB"
-        )
-        QUALIFY ROW_NUMBER() OVER (PARTITION BY insertId, timestamp, logName) = 1
-        {limit_text};
-    """
-
-    return textwrap.dedent(query)
-
-
 class BigQueryUsageState(Closeable):
     read_events: FileBackedDict[ReadEvent]
     query_events: FileBackedDict[QueryEvent]
@@ -375,7 +300,8 @@ class BigQueryUsageExtractor:
     * Aggregation of these statistics into buckets, by day or hour granularity
 
     :::note
-    1. Depending on the compliance policies setup for the bigquery instance, sometimes logging.read permission is not sufficient. In that case, use either admin or private log viewer permission.
+    1. Depending on the compliance policies setup for the bigquery instance, sometimes logging.read permission is not sufficient.
+    In that case, use either admin or private log viewer permission.
     :::
     """
 
@@ -674,109 +600,6 @@ def _store_usage_event(
             return True
         return False
 
-    def _get_exported_bigquery_audit_metadata(
-        self,
-        bigquery_client: BigQueryClient,
-        limit: Optional[int] = None,
-    ) -> Iterable[BigQueryAuditMetadata]:
-        if self.config.bigquery_audit_metadata_datasets is None:
-            self.report.bigquery_audit_metadata_datasets_missing = True
-            return
-
-        corrected_start_time = self.start_time - self.config.max_query_duration
-        start_time = corrected_start_time.strftime(BQ_DATETIME_FORMAT)
-        start_date = corrected_start_time.strftime(BQ_DATE_SHARD_FORMAT)
-        self.report.audit_start_time = start_time
-
-        corrected_end_time = self.end_time + self.config.max_query_duration
-        end_time = corrected_end_time.strftime(BQ_DATETIME_FORMAT)
-        end_date = corrected_end_time.strftime(BQ_DATE_SHARD_FORMAT)
-        self.report.audit_end_time = end_time
-
-        for dataset in self.config.bigquery_audit_metadata_datasets:
-            logger.info(
-                f"Start loading log entries from BigQueryAuditMetadata in {dataset}"
-            )
-
-            query = bigquery_audit_metadata_query_template(
-                dataset,
-                self.config.use_date_sharded_audit_log_tables,
-                limit=limit,
-            ).format(
-                start_time=start_time,
-                end_time=end_time,
-                start_date=start_date,
-                end_date=end_date,
-            )
-
-            query_job = bigquery_client.query(query)
-            logger.info(
-                f"Finished loading log entries from BigQueryAuditMetadata in {dataset}"
-            )
-            if self.config.rate_limit:
-                with RateLimiter(max_calls=self.config.requests_per_min, period=60):
-                    yield from query_job
-            else:
-                yield from query_job
-
-    def _get_bigquery_log_entries_via_gcp_logging(
-        self, client: GCPLoggingClient, limit: Optional[int] = None
-    ) -> Iterable[AuditLogEntry]:
-        filter = self._generate_filter(BQ_AUDIT_V2)
-        logger.debug(filter)
-
-        list_entries: Iterable[AuditLogEntry]
-        rate_limiter: Optional[RateLimiter] = None
-        if self.config.rate_limit:
-            # client.list_entries is a generator, does api calls to GCP Logging when it runs out of entries and needs to fetch more from GCP Logging
-            # to properly ratelimit we multiply the page size by the number of requests per minute
-            rate_limiter = RateLimiter(
-                max_calls=self.config.requests_per_min * self.config.log_page_size,
-                period=60,
-            )
-
-        list_entries = client.list_entries(
-            filter_=filter,
-            page_size=self.config.log_page_size,
-            max_results=limit,
-        )
-
-        for i, entry in enumerate(list_entries):
-            if i == 0:
-                logger.info(f"Starting log load from GCP Logging for {client.project}")
-            if i % 1000 == 0:
-                logger.info(f"Loaded {i} log entries from GCP Log for {client.project}")
-            self.report.total_query_log_entries += 1
-
-            if rate_limiter:
-                with rate_limiter:
-                    yield entry
-            else:
-                yield entry
-
-        logger.info(
-            f"Finished loading {self.report.total_query_log_entries} log entries from GCP Logging for {client.project}"
-        )
-
-    def _generate_filter(self, audit_templates: Dict[str, str]) -> str:
-        # We adjust the filter values a bit, since we need to make sure that the join
-        # between query events and read events is complete. For example, this helps us
-        # handle the case where the read happens within our time range but the query
-        # completion event is delayed and happens after the configured end time.
-
-        start_time = (self.start_time - self.config.max_query_duration).strftime(
-            BQ_DATETIME_FORMAT
-        )
-        self.report.log_entry_start_time = start_time
-        end_time = (self.end_time + self.config.max_query_duration).strftime(
-            BQ_DATETIME_FORMAT
-        )
-        self.report.log_entry_end_time = end_time
-        filter = audit_templates[BQ_FILTER_RULE_TEMPLATE].format(
-            start_time=start_time, end_time=end_time
-        )
-        return filter
-
     @staticmethod
     def _get_destination_table(event: AuditEvent) -> Optional[BigQueryTableRef]:
         if (
@@ -1011,27 +834,54 @@ def _parse_exported_bigquery_audit_metadata(
     def _get_parsed_bigquery_log_events(
         self, project_id: str, limit: Optional[int] = None
     ) -> Iterable[AuditEvent]:
+        audit_log_api = BigQueryAuditLogApi(
+            self.report.audit_log_api_perf,
+            self.config.rate_limit,
+            self.config.requests_per_min,
+        )
+        # We adjust the filter values a bit, since we need to make sure that the join
+        # between query events and read events is complete. For example, this helps us
+        # handle the case where the read happens within our time range but the query
+        # completion event is delayed and happens after the configured end time.
+        corrected_start_time = self.start_time - self.config.max_query_duration
+        corrected_end_time = self.end_time + -self.config.max_query_duration
+        self.report.audit_start_time = corrected_start_time
+        self.report.audit_end_time = corrected_end_time
+
         parse_fn: Callable[[Any], Optional[AuditEvent]]
         if self.config.use_exported_bigquery_audit_metadata:
-            bq_client = get_bigquery_client(self.config)
-            entries = self._get_exported_bigquery_audit_metadata(
+            bq_client = self.config.get_bigquery_client()
+
+            entries = audit_log_api.get_exported_bigquery_audit_metadata(
                 bigquery_client=bq_client,
+                bigquery_audit_metadata_datasets=self.config.bigquery_audit_metadata_datasets,
+                bigquery_audit_metadata_query_template=bigquery_audit_metadata_query_template_usage,
+                use_date_sharded_audit_log_tables=self.config.use_date_sharded_audit_log_tables,
+                start_time=corrected_start_time,
+                end_time=corrected_end_time,
                 limit=limit,
             )
             parse_fn = self._parse_exported_bigquery_audit_metadata
         else:
-            logging_client = _make_gcp_logging_client(
-                project_id, self.config.extra_client_options
+            logging_client = self.config.make_gcp_logging_client(project_id)
+            logger.info(
+                f"Start loading log entries from BigQuery for {project_id} "
+                f"with start_time={corrected_start_time} and end_time={corrected_end_time}"
             )
-            entries = self._get_bigquery_log_entries_via_gcp_logging(
-                logging_client, limit=limit
+            entries = audit_log_api.get_bigquery_log_entries_via_gcp_logging(
+                logging_client,
+                filter=self._generate_filter(corrected_start_time, corrected_end_time),
+                log_page_size=self.config.log_page_size,
+                limit=limit,
             )
             parse_fn = self._parse_bigquery_log_entry
 
         for entry in entries:
             try:
+                self.report.num_usage_total_log_entries[project_id] += 1
                 event = parse_fn(entry)
                 if event:
+                    self.report.num_usage_parsed_log_entries[project_id] += 1
                     yield event
             except Exception as e:
                 logger.warning(
@@ -1042,6 +892,12 @@ def _get_parsed_bigquery_log_events(
                     f"log-parse-{project_id}", e, group="usage-log-parse"
                 )
 
+    def _generate_filter(self, corrected_start_time, corrected_end_time):
+        return BQ_FILTER_RULE_TEMPLATE_V2_USAGE.format(
+            start_time=corrected_start_time.strftime(BQ_DATETIME_FORMAT),
+            end_time=corrected_end_time.strftime(BQ_DATETIME_FORMAT),
+        )
+
     def get_tables_from_query(
         self, default_project: str, query: str
     ) -> Optional[List[BigQueryTableRef]]:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py
index c8623798f6937..bbe52b5d98ba3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py
@@ -365,8 +365,8 @@ def populate_lineage(
             # Populate table level lineage by getting upstream tables from stl_scan redshift table
             query = RedshiftQuery.stl_scan_based_lineage_query(
                 self.config.database,
-                self.config.start_time,
-                self.config.end_time,
+                self.start_time,
+                self.end_time,
             )
             populate_calls.append((query, LineageCollectorType.QUERY_SCAN))
         elif self.config.table_lineage_mode == LineageMode.SQL_BASED:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
index 811ea67981e18..240e0ffa1a0b6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
@@ -543,15 +543,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
 
         self.connection.close()
 
-        lru_cache_functions: List[Callable] = [
-            self.data_dictionary.get_tables_for_database,
-            self.data_dictionary.get_views_for_database,
-            self.data_dictionary.get_columns_for_schema,
-            self.data_dictionary.get_pk_constraints_for_schema,
-            self.data_dictionary.get_fk_constraints_for_schema,
-        ]
-        for func in lru_cache_functions:
-            self.report.lru_cache_info[func.__name__] = func.cache_info()._asdict()  # type: ignore
+        self.report_cache_info()
 
         # TODO: The checkpoint state for stale entity detection can be committed here.
 
@@ -596,6 +588,17 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
         ) and self.usage_extractor:
             yield from self.usage_extractor.get_usage_workunits(discovered_datasets)
 
+    def report_cache_info(self):
+        lru_cache_functions: List[Callable] = [
+            self.data_dictionary.get_tables_for_database,
+            self.data_dictionary.get_views_for_database,
+            self.data_dictionary.get_columns_for_schema,
+            self.data_dictionary.get_pk_constraints_for_schema,
+            self.data_dictionary.get_fk_constraints_for_schema,
+        ]
+        for func in lru_cache_functions:
+            self.report.lru_cache_info[func.__name__] = func.cache_info()._asdict()  # type: ignore
+
     def report_warehouse_failure(self):
         if self.config.warehouse is not None:
             self.report_error(
diff --git a/metadata-ingestion/src/datahub/utilities/perf_timer.py b/metadata-ingestion/src/datahub/utilities/perf_timer.py
index 3fac1d68c3a9e..18384420bfefb 100644
--- a/metadata-ingestion/src/datahub/utilities/perf_timer.py
+++ b/metadata-ingestion/src/datahub/utilities/perf_timer.py
@@ -1,26 +1,49 @@
+import logging
 import time
 from contextlib import AbstractContextManager
 from typing import Any, Optional
 
+logger: logging.Logger = logging.getLogger(__name__)
+
 
 class PerfTimer(AbstractContextManager):
     """
     A context manager that gives easy access to elapsed time for performance measurement.
+
     """
 
-    start_time: Optional[float] = None
-    end_time: Optional[float] = None
+    def __init__(self) -> None:
+        self.start_time: Optional[float] = None
+        self.end_time: Optional[float] = None
+        self._past_active_time: float = 0
+        self.paused: bool = False
+        self._error_state = False
 
     def start(self) -> None:
+        if self.end_time is not None:
+            self._past_active_time = self.elapsed_seconds()
+
         self.start_time = time.perf_counter()
         self.end_time = None
+        self.paused = False
+
+    def pause(self) -> "PerfTimer":
+        self.assert_timer_is_running()
+        self._past_active_time = self.elapsed_seconds()
+        self.start_time = None
+        self.end_time = None
+        self.paused = True
+        return self
 
     def finish(self) -> None:
-        assert self.start_time is not None
+        self.assert_timer_is_running()
         self.end_time = time.perf_counter()
 
     def __enter__(self) -> "PerfTimer":
-        self.start()
+        if self.paused:  # Entering paused timer context, NO OP
+            pass
+        else:
+            self.start()
         return self
 
     def __exit__(
@@ -29,16 +52,46 @@ def __exit__(
         exc: Any,
         traceback: Any,
     ) -> Optional[bool]:
-        self.finish()
+        if self.paused:  # Exiting paused timer context, resume timer
+            self.start()
+        else:
+            self.finish()
         return None
 
     def elapsed_seconds(self) -> float:
         """
         Returns the elapsed time in seconds.
         """
+        if self.paused or not self.start_time:
+            return self._past_active_time
 
-        assert self.start_time is not None
         if self.end_time is None:
-            return time.perf_counter() - self.start_time
+            return (time.perf_counter() - self.start_time) + (self._past_active_time)
+        else:
+            return (self.end_time - self.start_time) + self._past_active_time
+
+    def assert_timer_is_running(self) -> None:
+        """
+        Returns true if timer is in running state.
+        Timer is in NOT in running state if
+        1. it has never been started.
+        2. it is in paused state.
+        3. it had been started and finished in the past but not started again.
+        """
+        if self.start_time is None or self.paused or self.end_time:
+            self._error_state = True
+            logger.warning("Did you forget to start the timer ?")
+
+    def __repr__(self) -> str:
+        return repr(self.as_obj())
+
+    def __str__(self) -> str:
+        return self.__repr__()
+
+    def as_obj(self) -> Optional[str]:
+        if self.start_time is None:
+            return None
         else:
-            return self.end_time - self.start_time
+            time_taken = self.elapsed_seconds()
+            state = " (error)" if self._error_state else ""
+            return f"{time_taken:.3f} seconds{state}"
diff --git a/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py b/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py
index cc3ee1f6ceaa4..602401134dcd3 100644
--- a/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py
+++ b/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py
@@ -4,8 +4,10 @@
 from freezegun import freeze_time
 from google.cloud.bigquery.table import TableListItem
 
+from datahub.ingestion.source.bigquery_v2.bigquery import BigqueryV2Source
 from datahub.ingestion.source.bigquery_v2.bigquery_schema import (
     BigqueryDataset,
+    BigQuerySchemaApi,
     BigqueryTable,
 )
 from tests.test_helpers import mce_helpers
@@ -15,15 +17,9 @@
 
 
 @freeze_time(FROZEN_TIME)
-@patch(
-    "datahub.ingestion.source.bigquery_v2.bigquery_schema.BigQueryDataDictionary.get_tables_for_dataset"
-)
-@patch(
-    "datahub.ingestion.source.bigquery_v2.bigquery.BigqueryV2Source.get_core_table_details"
-)
-@patch(
-    "datahub.ingestion.source.bigquery_v2.bigquery_schema.BigQueryDataDictionary.get_datasets_for_project_id"
-)
+@patch.object(BigQuerySchemaApi, "get_tables_for_dataset")
+@patch.object(BigqueryV2Source, "get_core_table_details")
+@patch.object(BigQuerySchemaApi, "get_datasets_for_project_id")
 @patch("google.cloud.bigquery.Client")
 def test_bigquery_v2_ingest(
     client,
diff --git a/metadata-ingestion/tests/unit/test_bigquery_lineage.py b/metadata-ingestion/tests/unit/test_bigquery_lineage.py
index 9b09fa36ba586..e23494963e475 100644
--- a/metadata-ingestion/tests/unit/test_bigquery_lineage.py
+++ b/metadata-ingestion/tests/unit/test_bigquery_lineage.py
@@ -3,6 +3,7 @@
 
 import pytest
 
+import datahub.emitter.mce_builder as builder
 from datahub.ingestion.source.bigquery_v2.bigquery_audit import (
     BigQueryTableRef,
     QueryEvent,
@@ -81,7 +82,9 @@ def lineage_entries() -> List[QueryEvent]:
 def test_lineage_with_timestamps(lineage_entries: List[QueryEvent]) -> None:
     config = BigQueryV2Config()
     report = BigQueryV2Report()
-    extractor: BigqueryLineageExtractor = BigqueryLineageExtractor(config, report)
+    extractor: BigqueryLineageExtractor = BigqueryLineageExtractor(
+        config, report, lambda x: builder.make_dataset_urn("bigquery", str(x))
+    )
 
     bq_table = BigQueryTableRef.from_string_name(
         "projects/my_project/datasets/my_dataset/tables/my_table"
@@ -96,7 +99,6 @@ def test_lineage_with_timestamps(lineage_entries: List[QueryEvent]) -> None:
         bq_table=bq_table,
         bq_table_urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,my_project.my_dataset.my_table,PROD)",
         lineage_metadata=lineage_map,
-        platform="bigquery",
     )
     assert upstream_lineage
     assert len(upstream_lineage.upstreams) == 4
@@ -105,7 +107,9 @@ def test_lineage_with_timestamps(lineage_entries: List[QueryEvent]) -> None:
 def test_column_level_lineage(lineage_entries: List[QueryEvent]) -> None:
     config = BigQueryV2Config(extract_column_lineage=True, incremental_lineage=False)
     report = BigQueryV2Report()
-    extractor: BigqueryLineageExtractor = BigqueryLineageExtractor(config, report)
+    extractor: BigqueryLineageExtractor = BigqueryLineageExtractor(
+        config, report, lambda x: builder.make_dataset_urn("bigquery", str(x))
+    )
 
     bq_table = BigQueryTableRef.from_string_name(
         "projects/my_project/datasets/my_dataset/tables/my_table"
@@ -120,7 +124,6 @@ def test_column_level_lineage(lineage_entries: List[QueryEvent]) -> None:
         bq_table=bq_table,
         bq_table_urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,my_project.my_dataset.my_table,PROD)",
         lineage_metadata=lineage_map,
-        platform="bigquery",
     )
     assert upstream_lineage
     assert len(upstream_lineage.upstreams) == 2
diff --git a/metadata-ingestion/tests/unit/test_bigquery_source.py b/metadata-ingestion/tests/unit/test_bigquery_source.py
index 6907f926249f5..4fc6c31626ba8 100644
--- a/metadata-ingestion/tests/unit/test_bigquery_source.py
+++ b/metadata-ingestion/tests/unit/test_bigquery_source.py
@@ -18,9 +18,10 @@
     BigQueryTableRef,
 )
 from datahub.ingestion.source.bigquery_v2.bigquery_config import BigQueryV2Config
+from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report
 from datahub.ingestion.source.bigquery_v2.bigquery_schema import (
-    BigQueryDataDictionary,
     BigqueryProject,
+    BigQuerySchemaApi,
     BigqueryView,
 )
 from datahub.ingestion.source.bigquery_v2.lineage import (
@@ -92,15 +93,17 @@ def test_bigquery_uri_with_credential():
         raise e
 
 
-@patch("google.cloud.bigquery.client.Client")
-def test_get_projects_with_project_ids(client_mock):
+@patch.object(BigQueryV2Config, "get_bigquery_client")
+def test_get_projects_with_project_ids(get_bq_client_mock):
+    client_mock = MagicMock()
+    get_bq_client_mock.return_value = client_mock
     config = BigQueryV2Config.parse_obj(
         {
             "project_ids": ["test-1", "test-2"],
         }
     )
     source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test1"))
-    assert source._get_projects(client_mock) == [
+    assert source._get_projects() == [
         BigqueryProject("test-1", "test-1"),
         BigqueryProject("test-2", "test-2"),
     ]
@@ -110,14 +113,17 @@ def test_get_projects_with_project_ids(client_mock):
         {"project_ids": ["test-1", "test-2"], "project_id": "test-3"}
     )
     source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test2"))
-    assert source._get_projects(client_mock) == [
+    assert source._get_projects() == [
         BigqueryProject("test-1", "test-1"),
         BigqueryProject("test-2", "test-2"),
     ]
     assert client_mock.list_projects.call_count == 0
 
 
-def test_get_projects_with_project_ids_overrides_project_id_pattern():
+@patch.object(BigQueryV2Config, "get_bigquery_client")
+def test_get_projects_with_project_ids_overrides_project_id_pattern(
+    get_bq_client_mock,
+):
     config = BigQueryV2Config.parse_obj(
         {
             "project_ids": ["test-project", "test-project-2"],
@@ -125,7 +131,7 @@ def test_get_projects_with_project_ids_overrides_project_id_pattern():
         }
     )
     source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test"))
-    projects = source._get_projects(MagicMock())
+    projects = source._get_projects()
     assert projects == [
         BigqueryProject(id="test-project", name="test-project"),
         BigqueryProject(id="test-project-2", name="test-project-2"),
@@ -143,7 +149,8 @@ def test_platform_instance_config_always_none():
     assert config.platform_instance is None
 
 
-def test_get_dataplatform_instance_aspect_returns_project_id():
+@patch.object(BigQueryV2Config, "get_bigquery_client")
+def test_get_dataplatform_instance_aspect_returns_project_id(get_bq_client_mock):
     project_id = "project_id"
     expected_instance = (
         f"urn:li:dataPlatformInstance:(urn:li:dataPlatform:bigquery,{project_id})"
@@ -162,7 +169,8 @@ def test_get_dataplatform_instance_aspect_returns_project_id():
     assert metadata.aspect.instance == expected_instance
 
 
-def test_get_dataplatform_instance_default_no_instance():
+@patch.object(BigQueryV2Config, "get_bigquery_client")
+def test_get_dataplatform_instance_default_no_instance(get_bq_client_mock):
     config = BigQueryV2Config.parse_obj({})
     source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test"))
 
@@ -176,18 +184,22 @@ def test_get_dataplatform_instance_default_no_instance():
     assert metadata.aspect.instance is None
 
 
-@patch("google.cloud.bigquery.client.Client")
-def test_get_projects_with_single_project_id(client_mock):
+@patch.object(BigQueryV2Config, "get_bigquery_client")
+def test_get_projects_with_single_project_id(get_bq_client_mock):
+    client_mock = MagicMock()
+    get_bq_client_mock.return_value = client_mock
     config = BigQueryV2Config.parse_obj({"project_id": "test-3"})
     source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test1"))
-    assert source._get_projects(client_mock) == [
+    assert source._get_projects() == [
         BigqueryProject("test-3", "test-3"),
     ]
     assert client_mock.list_projects.call_count == 0
 
 
-@patch("google.cloud.bigquery.client.Client")
-def test_get_projects_by_list(client_mock):
+@patch.object(BigQueryV2Config, "get_bigquery_client")
+def test_get_projects_by_list(get_bq_client_mock):
+    client_mock = MagicMock()
+    get_bq_client_mock.return_value = client_mock
     client_mock.list_projects.return_value = [
         SimpleNamespace(
             project_id="test-1",
@@ -201,15 +213,16 @@ def test_get_projects_by_list(client_mock):
 
     config = BigQueryV2Config.parse_obj({})
     source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test1"))
-    assert source._get_projects(client_mock) == [
+    assert source._get_projects() == [
         BigqueryProject("test-1", "one"),
         BigqueryProject("test-2", "two"),
     ]
     assert client_mock.list_projects.call_count == 1
 
 
-@patch.object(BigQueryDataDictionary, "get_projects")
-def test_get_projects_filter_by_pattern(get_projects_mock):
+@patch.object(BigQuerySchemaApi, "get_projects")
+@patch.object(BigQueryV2Config, "get_bigquery_client")
+def test_get_projects_filter_by_pattern(get_bq_client_mock, get_projects_mock):
     get_projects_mock.return_value = [
         BigqueryProject("test-project", "Test Project"),
         BigqueryProject("test-project-2", "Test Project 2"),
@@ -219,31 +232,35 @@ def test_get_projects_filter_by_pattern(get_projects_mock):
         {"project_id_pattern": {"deny": ["^test-project$"]}}
     )
     source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test"))
-    projects = source._get_projects(MagicMock())
+    projects = source._get_projects()
     assert projects == [
         BigqueryProject(id="test-project-2", name="Test Project 2"),
     ]
 
 
-@patch.object(BigQueryDataDictionary, "get_projects")
-def test_get_projects_list_empty(get_projects_mock):
+@patch.object(BigQuerySchemaApi, "get_projects")
+@patch.object(BigQueryV2Config, "get_bigquery_client")
+def test_get_projects_list_empty(get_bq_client_mock, get_projects_mock):
     get_projects_mock.return_value = []
 
     config = BigQueryV2Config.parse_obj(
         {"project_id_pattern": {"deny": ["^test-project$"]}}
     )
     source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test"))
-    projects = source._get_projects(MagicMock())
+    projects = source._get_projects()
     assert len(source.report.failures) == 1
     assert projects == []
 
 
-@patch.object(BigQueryDataDictionary, "get_projects")
+@patch.object(BigQueryV2Config, "get_bigquery_client")
 def test_get_projects_list_failure(
-    get_projects_mock: MagicMock, caplog: pytest.LogCaptureFixture
+    get_bq_client_mock: MagicMock,
+    caplog: pytest.LogCaptureFixture,
 ) -> None:
     error_str = "my error"
-    get_projects_mock.side_effect = GoogleAPICallError(error_str)
+    bq_client_mock = MagicMock()
+    get_bq_client_mock.return_value = bq_client_mock
+    bq_client_mock.list_projects.side_effect = GoogleAPICallError(error_str)
 
     config = BigQueryV2Config.parse_obj(
         {"project_id_pattern": {"deny": ["^test-project$"]}}
@@ -251,27 +268,29 @@ def test_get_projects_list_failure(
     source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test"))
     caplog.records.clear()
     with caplog.at_level(logging.ERROR):
-        projects = source._get_projects(MagicMock())
+        projects = source._get_projects()
         assert len(caplog.records) == 1
         assert error_str in caplog.records[0].msg
     assert len(source.report.failures) == 1
     assert projects == []
 
 
-@patch.object(BigQueryDataDictionary, "get_projects")
-def test_get_projects_list_fully_filtered(get_projects_mock):
+@patch.object(BigQuerySchemaApi, "get_projects")
+@patch.object(BigQueryV2Config, "get_bigquery_client")
+def test_get_projects_list_fully_filtered(get_projects_mock, get_bq_client_mock):
     get_projects_mock.return_value = [BigqueryProject("test-project", "Test Project")]
 
     config = BigQueryV2Config.parse_obj(
         {"project_id_pattern": {"deny": ["^test-project$"]}}
     )
     source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test"))
-    projects = source._get_projects(MagicMock())
+    projects = source._get_projects()
     assert len(source.report.failures) == 0
     assert projects == []
 
 
-def test_simple_upstream_table_generation():
+@patch.object(BigQueryV2Config, "get_bigquery_client")
+def test_simple_upstream_table_generation(get_bq_client_mock):
     a: BigQueryTableRef = BigQueryTableRef(
         BigqueryTableIdentifier(
             project_id="test-project", dataset="test-dataset", table="a"
@@ -302,7 +321,10 @@ def test_simple_upstream_table_generation():
     assert list(upstreams)[0].table == str(b)
 
 
-def test_upstream_table_generation_with_temporary_table_without_temp_upstream():
+@patch.object(BigQueryV2Config, "get_bigquery_client")
+def test_upstream_table_generation_with_temporary_table_without_temp_upstream(
+    get_bq_client_mock,
+):
     a: BigQueryTableRef = BigQueryTableRef(
         BigqueryTableIdentifier(
             project_id="test-project", dataset="test-dataset", table="a"
@@ -332,7 +354,8 @@ def test_upstream_table_generation_with_temporary_table_without_temp_upstream():
     assert list(upstreams) == []
 
 
-def test_upstream_table_column_lineage_with_temp_table():
+@patch.object(BigQueryV2Config, "get_bigquery_client")
+def test_upstream_table_column_lineage_with_temp_table(get_bq_client_mock):
     from datahub.ingestion.api.common import PipelineContext
 
     a: BigQueryTableRef = BigQueryTableRef(
@@ -406,7 +429,10 @@ def test_upstream_table_column_lineage_with_temp_table():
     assert upstream.column_confidence == 0.7
 
 
-def test_upstream_table_generation_with_temporary_table_with_multiple_temp_upstream():
+@patch.object(BigQueryV2Config, "get_bigquery_client")
+def test_upstream_table_generation_with_temporary_table_with_multiple_temp_upstream(
+    get_bq_client_mock,
+):
     a: BigQueryTableRef = BigQueryTableRef(
         BigqueryTableIdentifier(
             project_id="test-project", dataset="test-dataset", table="a"
@@ -466,11 +492,11 @@ def test_upstream_table_generation_with_temporary_table_with_multiple_temp_upstr
     assert sorted_list[1].table == str(e)
 
 
-@patch(
-    "datahub.ingestion.source.bigquery_v2.bigquery_schema.BigQueryDataDictionary.get_tables_for_dataset"
-)
-@patch("google.cloud.bigquery.client.Client")
-def test_table_processing_logic(client_mock, data_dictionary_mock):
+@patch.object(BigQuerySchemaApi, "get_tables_for_dataset")
+@patch.object(BigQueryV2Config, "get_bigquery_client")
+def test_table_processing_logic(get_bq_client_mock, data_dictionary_mock):
+    client_mock = MagicMock()
+    get_bq_client_mock.return_value = client_mock
     config = BigQueryV2Config.parse_obj(
         {
             "project_id": "test-project",
@@ -523,7 +549,7 @@ def test_table_processing_logic(client_mock, data_dictionary_mock):
 
     _ = list(
         source.get_tables_for_dataset(
-            conn=client_mock, project_id="test-project", dataset_name="test-dataset"
+            project_id="test-project", dataset_name="test-dataset"
         )
     )
 
@@ -531,17 +557,19 @@ def test_table_processing_logic(client_mock, data_dictionary_mock):
 
     # args only available from python 3.8 and that's why call_args_list is sooo ugly
     tables: Dict[str, TableListItem] = data_dictionary_mock.call_args_list[0][0][
-        3
+        2
     ]  # alternatively
     for table in tables.keys():
         assert table in ["test-table", "test-sharded-table_20220102"]
 
 
-@patch(
-    "datahub.ingestion.source.bigquery_v2.bigquery_schema.BigQueryDataDictionary.get_tables_for_dataset"
-)
-@patch("google.cloud.bigquery.client.Client")
-def test_table_processing_logic_date_named_tables(client_mock, data_dictionary_mock):
+@patch.object(BigQuerySchemaApi, "get_tables_for_dataset")
+@patch.object(BigQueryV2Config, "get_bigquery_client")
+def test_table_processing_logic_date_named_tables(
+    get_bq_client_mock, data_dictionary_mock
+):
+    client_mock = MagicMock()
+    get_bq_client_mock.return_value = client_mock
     # test that tables with date names are processed correctly
     config = BigQueryV2Config.parse_obj(
         {
@@ -595,7 +623,7 @@ def test_table_processing_logic_date_named_tables(client_mock, data_dictionary_m
 
     _ = list(
         source.get_tables_for_dataset(
-            conn=client_mock, project_id="test-project", dataset_name="test-dataset"
+            project_id="test-project", dataset_name="test-dataset"
         )
     )
 
@@ -603,7 +631,7 @@ def test_table_processing_logic_date_named_tables(client_mock, data_dictionary_m
 
     # args only available from python 3.8 and that's why call_args_list is sooo ugly
     tables: Dict[str, TableListItem] = data_dictionary_mock.call_args_list[0][0][
-        3
+        2
     ]  # alternatively
     for table in tables.keys():
         assert tables[table].table_id in ["test-table", "20220103"]
@@ -644,16 +672,16 @@ def bigquery_view_2() -> BigqueryView:
     )
 
 
-@patch(
-    "datahub.ingestion.source.bigquery_v2.bigquery_schema.BigQueryDataDictionary.get_query_result"
-)
-@patch("google.cloud.bigquery.client.Client")
+@patch.object(BigQuerySchemaApi, "get_query_result")
+@patch.object(BigQueryV2Config, "get_bigquery_client")
 def test_get_views_for_dataset(
-    client_mock: Mock,
+    get_bq_client_mock: Mock,
     query_mock: Mock,
     bigquery_view_1: BigqueryView,
     bigquery_view_2: BigqueryView,
 ) -> None:
+    client_mock = MagicMock()
+    get_bq_client_mock.return_value = client_mock
     assert bigquery_view_1.last_altered
     row1 = create_row(
         dict(
@@ -675,9 +703,11 @@ def test_get_views_for_dataset(
         )
     )
     query_mock.return_value = [row1, row2]
+    bigquery_data_dictionary = BigQuerySchemaApi(
+        BigQueryV2Report().schema_api_perf, client_mock
+    )
 
-    views = BigQueryDataDictionary.get_views_for_dataset(
-        conn=client_mock,
+    views = bigquery_data_dictionary.get_views_for_dataset(
         project_id="test-project",
         dataset_name="test-dataset",
         has_data_read=False,
@@ -686,7 +716,10 @@ def test_get_views_for_dataset(
 
 
 @patch.object(BigqueryV2Source, "gen_dataset_workunits", lambda *args, **kwargs: [])
-def test_gen_view_dataset_workunits(bigquery_view_1, bigquery_view_2):
+@patch.object(BigQueryV2Config, "get_bigquery_client")
+def test_gen_view_dataset_workunits(
+    get_bq_client_mock, bigquery_view_1, bigquery_view_2
+):
     project_id = "test-project"
     dataset_name = "test-dataset"
     config = BigQueryV2Config.parse_obj(
diff --git a/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py b/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py
index 6ee1f05f0582c..4cf42da4395f9 100644
--- a/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py
+++ b/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py
@@ -4,7 +4,6 @@
 from freezegun import freeze_time
 
 from datahub.ingestion.source.bigquery_v2.bigquery_audit import (
-    BQ_AUDIT_V2,
     BigqueryTableIdentifier,
     BigQueryTableRef,
 )
@@ -111,10 +110,12 @@ def test_bigqueryv2_filters():
     OR
     protoPayload.metadata.tableDataRead.reason = "JOB"
 )"""  # noqa: W293
-    source = BigQueryUsageExtractor(
-        config, BigQueryV2Report(), dataset_urn_builder=lambda _: ""
-    )
-    filter: str = source._generate_filter(BQ_AUDIT_V2)
+
+    corrected_start_time = config.start_time - config.max_query_duration
+    corrected_end_time = config.end_time + config.max_query_duration
+    filter: str = BigQueryUsageExtractor(
+        config, BigQueryV2Report(), lambda x: ""
+    )._generate_filter(corrected_start_time, corrected_end_time)
     assert filter == expected_filter
 
 
diff --git a/metadata-ingestion/tests/unit/utilities/test_perf_timer.py b/metadata-ingestion/tests/unit/utilities/test_perf_timer.py
new file mode 100644
index 0000000000000..d5fde314c2b57
--- /dev/null
+++ b/metadata-ingestion/tests/unit/utilities/test_perf_timer.py
@@ -0,0 +1,46 @@
+import time
+from functools import partial
+
+import pytest
+
+from datahub.utilities.perf_timer import PerfTimer
+
+approx = partial(pytest.approx, rel=1e-2)
+
+
+def test_perf_timer_simple():
+    with PerfTimer() as timer:
+        time.sleep(1)
+        assert approx(timer.elapsed_seconds()) == 1
+
+    assert approx(timer.elapsed_seconds()) == 1
+
+
+def test_perf_timer_paused_timer():
+    with PerfTimer() as current_timer:
+        time.sleep(1)
+        assert approx(current_timer.elapsed_seconds()) == 1
+        with current_timer.pause():
+            time.sleep(2)
+            assert approx(current_timer.elapsed_seconds()) == 1
+        assert approx(current_timer.elapsed_seconds()) == 1
+        time.sleep(1)
+
+    assert approx(current_timer.elapsed_seconds()) == 2
+
+
+def test_generator_with_paused_timer():
+    def generator_function():
+        with PerfTimer() as inner_timer:
+            time.sleep(1)
+            for i in range(10):
+                time.sleep(0.2)
+                with inner_timer.pause():
+                    time.sleep(0.2)
+                    yield i
+            assert approx(inner_timer.elapsed_seconds()) == 1 + 0.2 * 10
+
+    with PerfTimer() as outer_timer:
+        seq = generator_function()
+        list([i for i in seq])
+        assert approx(outer_timer.elapsed_seconds()) == 1 + 0.2 * 10 + 0.2 * 10

From f4da93988e8cbb14c74946ddc72fdbd4205a015e Mon Sep 17 00:00:00 2001
From: Tony Ouyang <tony@terratrue.com>
Date: Fri, 15 Sep 2023 13:26:17 -0700
Subject: [PATCH 017/156] feat(ingestion/dynamodb): Add DynamoDB as new
 metadata ingestion source (#8768)

Co-authored-by: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
---
 .../app/ingest/source/builder/constants.ts    |   4 +
 .../app/ingest/source/builder/sources.json    |   7 +
 datahub-web-react/src/images/dynamodblogo.png | Bin 0 -> 60888 bytes
 .../docs/sources/dynamodb/dynamodb_post.md    |  29 ++
 .../docs/sources/dynamodb/dynamodb_pre.md     |  26 +
 .../docs/sources/dynamodb/dynamodb_recipe.yml |  25 +
 metadata-ingestion/setup.py                   |   2 +
 .../ingestion/source/dynamodb/__init__.py     |   0
 .../ingestion/source/dynamodb/dynamodb.py     | 469 ++++++++++++++++++
 ...default_platform_instance_mces_golden.json | 132 +++++
 ...ynamodb_platform_instance_mces_golden.json | 132 +++++
 .../integration/dynamodb/test_dynamodb.py     |  95 ++++
 .../main/resources/boot/data_platforms.json   |  10 +
 13 files changed, 931 insertions(+)
 create mode 100644 datahub-web-react/src/images/dynamodblogo.png
 create mode 100644 metadata-ingestion/docs/sources/dynamodb/dynamodb_post.md
 create mode 100644 metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md
 create mode 100644 metadata-ingestion/docs/sources/dynamodb/dynamodb_recipe.yml
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/dynamodb/__init__.py
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py
 create mode 100644 metadata-ingestion/tests/integration/dynamodb/dynamodb_default_platform_instance_mces_golden.json
 create mode 100644 metadata-ingestion/tests/integration/dynamodb/dynamodb_platform_instance_mces_golden.json
 create mode 100644 metadata-ingestion/tests/integration/dynamodb/test_dynamodb.py

diff --git a/datahub-web-react/src/app/ingest/source/builder/constants.ts b/datahub-web-react/src/app/ingest/source/builder/constants.ts
index 8d41c3533575a..61667a941765c 100644
--- a/datahub-web-react/src/app/ingest/source/builder/constants.ts
+++ b/datahub-web-react/src/app/ingest/source/builder/constants.ts
@@ -27,6 +27,7 @@ import powerbiLogo from '../../../../images/powerbilogo.png';
 import modeLogo from '../../../../images/modelogo.png';
 import databricksLogo from '../../../../images/databrickslogo.png';
 import verticaLogo from '../../../../images/verticalogo.png';
+import dynamodbLogo from '../../../../images/dynamodblogo.png';
 
 export const ATHENA = 'athena';
 export const ATHENA_URN = `urn:li:dataPlatform:${ATHENA}`;
@@ -43,6 +44,8 @@ export const DBT = 'dbt';
 export const DBT_URN = `urn:li:dataPlatform:${DBT}`;
 export const DRUID = 'druid';
 export const DRUID_URN = `urn:li:dataPlatform:${DRUID}`;
+export const DYNAMODB = 'dynamodb';
+export const DYNAMODB_URN = `urn:li:dataPlatform:${DYNAMODB}`;
 export const ELASTICSEARCH = 'elasticsearch';
 export const ELASTICSEARCH_URN = `urn:li:dataPlatform:${ELASTICSEARCH}`;
 export const FEAST = 'feast';
@@ -107,6 +110,7 @@ export const PLATFORM_URN_TO_LOGO = {
     [CLICKHOUSE_URN]: clickhouseLogo,
     [DBT_URN]: dbtLogo,
     [DRUID_URN]: druidLogo,
+    [DYNAMODB_URN]: dynamodbLogo,
     [ELASTICSEARCH_URN]: elasticsearchLogo,
     [FEAST_URN]: feastLogo,
     [GLUE_URN]: glueLogo,
diff --git a/datahub-web-react/src/app/ingest/source/builder/sources.json b/datahub-web-react/src/app/ingest/source/builder/sources.json
index 13643c58f72e1..b4ea2db018bd8 100644
--- a/datahub-web-react/src/app/ingest/source/builder/sources.json
+++ b/datahub-web-react/src/app/ingest/source/builder/sources.json
@@ -125,6 +125,13 @@
         "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/mongodb/",
         "recipe": "source:\n    type: mongodb\n    config:\n        # Coordinates\n        connect_uri: # Your MongoDB connect URI, e.g. \"mongodb://localhost\"\n\n        # Credentials\n        # Add secret in Secrets Tab with relevant names for each variable\n        username: \"${MONGO_USERNAME}\" # Your MongoDB username, e.g. admin\n        password: \"${MONGO_PASSWORD}\" # Your MongoDB password, e.g. password_01\n\n        # Options (recommended)\n        enableSchemaInference: True\n        useRandomSampling: True\n        maxSchemaSize: 300"
     },
+    {
+        "urn": "urn:li:dataPlatform:dynamodb",
+        "name": "dynamodb",
+        "displayName": "DynamoDB",
+        "docsUrl": "https://datahubproject.io/docs/metadata-ingestion/",
+        "recipe": "source:\n    type: dynamodb\n    config:\n        platform_instance: \"AWS_ACCOUNT_ID\"\n        aws_access_key_id : '${AWS_ACCESS_KEY_ID}'\n        aws_secret_access_key : '${AWS_SECRET_ACCESS_KEY}'\n        # User could use the below option to provide a list of primary keys of a table in dynamodb format,\n        # those items from given primary keys will be included when we scan the table.\n        # For each table we can retrieve up to 16 MB of data, which can contain as many as 100 items.\n        # We'll enforce the the primary keys list size not to exceed 100\n        # The total items we'll try to retrieve in these two scenarios:\n        # 1. If user don't specify include_table_item: we'll retrieve up to 100 items\n        # 2. If user specifies include_table_item: we'll retrieve up to 100 items plus user specified items in\n        # the table, with a total not more than 200 items\n        # include_table_item:\n        #   table_name:\n        #     [\n        #       {\n        #         'partition_key_name': { 'attribute_type': 'attribute_value' },\n        #         'sort_key_name': { 'attribute_type': 'attribute_value' },\n        #       },\n        #     ]"
+    },
     {
         "urn": "urn:li:dataPlatform:glue",
         "name": "glue",
diff --git a/datahub-web-react/src/images/dynamodblogo.png b/datahub-web-react/src/images/dynamodblogo.png
new file mode 100644
index 0000000000000000000000000000000000000000..f5beafb035772cacfc5fbe5da985ae8da1c74973
GIT binary patch
literal 60888
zcmeEvc|29y8~5(kO#^AB2&JefM+s$2rAg7K!R&_07#T91QmK$`10qAIR5B%;2q!{i
zER~sqL}iLN#NmC`S_gH1_w)Y$zMp&kh-2-wp85Mc>)C7XF6!*u%r|BJ6pEtwwr<(D
zo1&&ND2k_d(nR=+SM!e<@ME6CrUMRpY|I>-j87b)*8gQ=azt#am9hDe-A9c7a<O}T
zM46)6PHo+|ey?+7Yh}LQk*)L2y)`=X{@PsMLtC~yo!U0egQc-Oh~7yLljWUS$)o&Y
zf>y+%S4$r~>H6#*>fVqP@I%Vvd7j}ip!MJ9KMnk+f&Vn{p9cQZz<(O}|3?E%eGk(=
zkT`;Q;X_@;YM#zRKRNtrLq+qcD?K$%nJYX8vpa@+>IeJ{J%_%i%^e4q(uXFrqbW3c
zU_8f0w7v0g#&G@SNJ_nYJ8S6sfUNcKy-79b!U(0)O6Wp|AL}e_V9pixCRQzXXf=1p
zb{Tg_b987H8M@VSsJwPk^*FJPP-gm!)bX$o^EIJtd**O<uO@36<)Sd5-LGT;BC>Ol
zEf4&lN4Ro_oJNPT$gJs|7Tg6-k1VKufr%$i`(eo)x;Qe#e8HtfkIW*pct#hT#-$}G
zydYWO4=3uC63y1;ELKYAV#2+$*c>u6b##`<V#1-)T`q?&Mrnm|X?Z1bX-SUGvgXcm
zx-~*ec7&GgIl_Hyi4;*{*}~DG(_|>4M2R~zI;-;%8G55=>?&@{Lw`Q*54L8@G^9fr
z!iJ^Q)A;lmjPdHV>Ze`B9gtd$TtQ6uB+DHmin3vjFdx5yNPKvn3L&LnJ7UW6%a{~>
zMZrv>j1aLA1}!&$`M{#@%%)Ze!2+Ll%w{NPU>X6ESWiY<6VJE#!2(?pywyUtu_xuv
zWFS9?zh1#r;Vd4m{pn5MDll{)YdKe}KS#u3t|DS-StW7B(ij!%6cOwG(-E;oZ9?OU
zwdWvLtdvo)g1BNWYvqbnH7b@CSF938u2@#1Vx8xTwIz)!mi(w#*+eYb^%|~NLZf2U
z6R`x(#B+`#Pn*T?lpq!nB&?^-OwQbaLg5d#4T-ImomjQ%0$IK6$q+x<ERVl&jL@x#
zB5rLf_rN-zTpc$P2bS$RV#BEE$#t8_Vj+`p3@aiFKv*p`9x#R3S0rsSL&#!^WXg=J
z-ewe~+N>cOF_J7{Y}yh_we!e)6WX<B5h~m%-qVvsw-YMagk?ovLM3Wa)oMaT?G~XD
z1rh*#=L_*F+$n;D3jZwZ3bq2q18d0?e=ZdT!kEI0<Z$kkQbL7slTdM*Ts1Hq+&hfP
zanQgn7fTG#DPintNVM!k{GPRlEM_<(*?&D@x&15-bT_W0C9`@Hi2SO&gtKtMvb?0F
z=rtmLHi0iO?APTt;@vF?5&rEs?&4)J5vJMMHhaRCyEEpC&c|J9p=G!aS*mn8R;A@e
zY~y%E<AGa5`tZwGVMgO&X|UI($yLn~5Z%hB2c8m!rxJxpYnEqB1bUw)S4T-=Pdab~
ztHlT`nM5SXAd)aOU}^YVo0S~5l?dNNG&z@u5_CP@-HM1JLqwr*MfvM&n=4lo7b41J
zB8tR9%Q~(oRYVjmA_@&e2_vG^aYgxqh_Zfi6`LzcG7)7w5rtQ?yl68aZ=;PhNfRTQ
z6g%7IMmTmdARJF5klmtHUUZmHX`X}4#iWRkp_8gy{E3eB#IRmkwqPCGt}<3iA+m%@
z5&8>AQ0`o0*(^fn+l!OME|SHJvm8YhV(lwAkqEn`ju)E_Q|6H=-C}r(I?+DMD9iCO
zXcY{mq)nzwAXKKolr3B;(+QQ+go^ge<ZIk1JjC~f36)MUW2LQR%B)3L7UwhA#;l!L
zjz!!l#v0gBcnB5i#g@%lTq-jOl?8-~?xd<O#Pywa66j%&<ih&HSV;w}3ZcJKk}T##
z7BiTUy!wyR*vGzTVZ(GHmjk$;YOExVGD{dUbUiUl263fiNlWvMg#Kz0;p%W5Oc9!u
zym}Q`Yu<J2;^o0umh#K-q4R(qkS}6kyD$SV$4vRNZ3W1wT>fy9Sct_>Ce{xCBAQ$x
z^RSTde`3DkrHz$VgHs{CwqoF;-^2b2X{G8Q5*JWRYbO>(8aWn4NpC1G+6MFh5zS{{
z>5Z1b(vT`oWjXGLq>U`;Ljr$0xDSk!&P-0^igJ*M5=2A^0#T4pAyFELDA$Q7a%bCG
zxuVDuQSK8_&Vwji-%li>(1<8B5JibA$_^sRZ6Zqjq$*@b^XZJlwODu>sa0sa#!7?X
zM8?jGSjB>*MrhetKCKR*5!A9EgE-Q8Vy@H~$=OGUCbL6SbK82dIbGk1)i_ncASbhe
zF#qLjn{6mr*LEMr)>8mOmpQ@OC|IG5=+8Uk16iC~!!^J>HDdC5>m=+ZbQ-qPba6}X
zJmPZG_@g&rdz{5SN6(&IRSyP788W-{7|}Zq!3l35Wyd9z{hpBGmT+Mbmfnrr1--co
zDiZ@MYn4x1OBURqwL$}#mbD(f!w>5$M<9dM(!eF<M5I!fm7MLuB~{8L#jW6CXC`O&
zj|_1IjwOyEFuCfsC=vK}$f2_cbeVq=QbJjdE!xDFTVm&8vvCW!KH#sB;L?KgyL_<S
zoB?!T^4T^FgaGlzibQ|h`mH=DenIXM;+wqDWI0QeBq7d#y*@5`^@Kfo8&<86c$Q<3
zH?bqlkFXcdhwWjhUOugx#Pfptdk7Dte50S6TxD!P)R+B~D2e$5)`#j1(5ZkcQ;-4C
z>;Y0z&0S>aeUb#iwEPFezWG>3%s<Yy#fuOf8L#CEMXI+T5b7gWC~2-xT12Q>Ak<H;
zP)%H+z7U}nWjU5}g^K42MI%CKflvcnp^~{mk>ZVh8ie8&2-(S8q2jSnG!Tjltd?;^
zvAn2~qNwpRlE>pRc0xPThiH$+i?zoKgr1YkrqsEOD1uwOEtp(&dOKm9XC+q=HJI^;
z@y1HFBkRo}ieNclXHon+egWeKF%;8}%idAUo-JUttqf7$X(g^ENI5nENNwhZfbs*v
z`b$ho4ER$fC6p6uE0>flA$28wfg%D4z(wYIE-7O|iULw{TvA?KQlyMyP0n%@+eExs
z>@FdttcA72KRtQ;QQ|J+jk%<(F)4-@km8mM%&lBfg@hE+g*=zkH7==WJk&YOSgC|N
zOI?z?ASvdauP>kWgRAzEgGB9ps8Xe<3^7X{O>W|*T_DElBvs?3_yvO8`haOfxS*ps
z^mNeQF0TG~9}%I1F!01LF;=3X${GUk^b5kC(FQD3@*+zfvk^iU2_aI`G3TCbV_Jd?
zkXU#mi2Kp`usse0$1eyPvF}~NLp%Y-?K6|dCvqVvNRP-g7w;U6mSs5_&Eaa+hqw~A
zu#*NwJjUDTKb(wYiH;5sp&o)z`?x|$bA_6Pg`$B_@gqXzaD~#qLfr?U9JxZpbA=++
z9dkAag?1lED0!|>ld({OAXNE?Q1^*YG$Pan5Gsu;R5@3uiA1QSS&sB!Zj7?75&iMv
zLXtWCY#U|BoyC$OQV9~NVqwT;WM~H&B2^x2mK^Ea-O{wqSiIn1VEVupzeR<!l!vrT
zL+0&il#SKc;QdlM@(4TN>VUkx*PgyDHO*I^pNZ(^n=RE=QJ_TgPIt8qQRuMJ)VsfU
z>fMhz7w#RJx>_r(-?aK}>CJ@i8!JETJvmOk$U$?Xhw8gX$*YYW8x=EuE@95bs!-em
zOf-U2OmLWSmzxZ5foyvoJM^u|#!AfBaGeo^93^=|2IoaZQtnH9eN#5v{ZnRcCaCNd
zswgD@U^@@CbE+CWhkJT|R&I|3_cog8VLSnTYHhn}7M(p*Yc@Rv)g^M9(i9KJ>~yMF
z3SWXIfWuPMtiSIb9K32Tb+$S%6uwf0PY&JMVG6$(&BW~Z)`&Ltidmy<V{!i*OwJaB
zUj-HL<ZIJ2tM%5PTUB(;<0<QEl4apn+D_~wxy!dE3Z~S_O-7?P<KKRD;H;Pgzb1g(
zaFP7h;3S`DtC=uLtzM$p?or5(25}Cc&D|QDvqN7<0__NC_5nVLAHF&S`a%DSUW(JN
z?J>B3PrtzttzEI?1aN00O|%CRHw6Y7|5ze{R!+>HiS)1&ZJa5l`7Er4&RGhFLt|f|
zm@Wo*fIKhP5O^ax7foQgR>tg1vEqea-KJp~!<TGLOv^c0x)=@j%@c~*Syp!$epMtp
zAjyEc2h$kVA&3kmIz!wka|l*sUL=P8dvRf`K-u}NXs~l~eolqu$<%yJ5Q}LCmWM@>
z7Z*0<#_YI;E?@=4&Nbiu3319D4Om?G#_zE4Q<te|{Ib-_9gO3MHDQ#{D7yAK->Q@>
zi^jQV^w!?A4__{!Mf(<G#`9)mx)I}0JL?^zORa>E4~Rl52uMX<mGiN4t-?YHdim{-
za;-sj?VFCLieCRkDAT7EV7o(NFsz2NEwgyryVg^7RzX;1(_~@IzXzV|g5l0R*z!Xg
zubSC;eY}sQrY+R<NyHXnMPUmKZn<jqeR~e$Sxul1%%QY)>pih8L&Tj?ZA#I@E2l)t
z2q4oNy#r&gFC(Y2B{9zrY=>0kw83AfTlZ`Zg<4mknHTWN*RRT@?a{Nsx-^<1e<(CI
z2k8x4VrgTqqn*1cb~mlJ0m%3i5|!z@{hhSOQHC)YsW>5Mb;3{{ZpVUKAw>lld)o~J
zU`cdl3>o|h{@93#tp@i#A--SGDi^g?>5mRrolNzxn+{BCV#}K8R-a8ehKxn+)d9=Y
z%0igXE3o#1)+6^lfZdmR)Ecrss!AS@;*vi|c586W%GhK=bi5Pz>Lxx|W;Ip;ZNpW$
z;{s*+Sjq?q5*u;zfib2f4xWMw;8ORc>`-u1CufPX3-?E*S<Z$pg}5;lG;V9K{)*Tb
z+Rs(MEk%8mSZPx(hsb}%xG$#%5p+J9gnUG8EQB>5!UGGGe<}6u9ESS`q;txG4o;Ph
zL!}Vw$|4>pYes*=VXN*{4H~h+GbQlM4NH#BRXCWm1M6$zrN7e*SAYXq;GMks_$5aj
z6B+Hn*roWA^lrs~OMXB&6^!Z#s)PnAE8IsQK|MR?m3!icFDr#WY;}8V;Q5Qe9`^eL
z-JgNgC~Aj!1$eu9HW|~Xh9$t(so=?*+Tm9|Tt8b}?psp(2)P$U^BRr&9;_w{`r<;9
zY7Y|jLB=@jU%`dPaMMl4{ulVt0rYADT}I?$&sYP9P`xFIc$Chel|}obOZBl!ox<St
zelLW_tQ!pEf|Q3Aigz7T6gc$x+ul7Myt>ji<1va<%A?=}0?c4C%a=^J+++q?$0*Q)
z-Q_`e-@R%g1XHrcnyd2%{8;CHi;vE2>?N+mOt5;ouI3wpIURmXFL=wl$NG9H*do+U
zOVH@geH#%U*pc3=Uo|W9Ii9^3@xn@!_TKd=1N+ej44MxZU~RL91iI*Zo*lZi;&0?2
zN!b0DOM^KQ_X=7C;kynyC0AY}(Debk%7X{l40kDC`Ryposm8a!_9CQ#OIX&ux`S7(
zh?5z$T94$Fy{IaqBF_ARTt+4k<?SbtlRcP=1<qM~^yA9dD(r&PtQ3S?mlgq6=ddFz
z2Pf$}Qc3WbdGo-u$Q|t%j&=C3LM|x+plV@`i>_zp19N-vjf03kuF6R%Y&!Qg7kMPb
zvW!Vyr=R&4tfMmyPds)(=+1sWEX@7;HD4V%1i^w#2ds+wpbEykZP*2=QuYom5;K2b
zmhLazny8fXL4Gn?_<=7V@$rQyn6HyjV0aX_bk-imO8SD$qqQDzG(Uk40|9;8p;1|E
zld&6`x6-WZR*_}pJoA8`Ak?a%nDfBPst0Fl3Pm82^kat+a$TGOV)0^ri$0nOT6th4
ze7y~_*r2?@tiVKXK|*V|A4Ni(O0rM+@Ff|EjcXN(G8Y3;>mQP=u5(!S1pAvW(3piC
zCJ5l6GUsF&_NR!CzarNsEUW~mg7J1MQB>jdUy$n>4%eV7kWJ^7?H6?a0vdvHW`o|b
zgrf*zx#JMBZ+7flPZrqWkpAI|HMT<^mP<lBw~Zuu%I^a>Tnc9VIaZb_irH$18p7N6
z_T+UnAt+F2s@S};+uliATOK1xz5p5NTbVG3ol78D0+lN}oJbs*y*Xi*3|@&d;w@;X
zfOdGnXN7|0iD#KzMcKzl3lollOxxcCuyqlOR3NdP24r(_d(U<X$q+1+k$QzSn@pOO
z`S=hnyC@dQE!YICZmdMrJy*?Stuv3}e8$wae7a8GQ)vQZc3vz=@fDQWD@lB~sry!Q
zeu{&2Nz-;GGJwlb67h8Z1HV95BhC%C021sPuM#9uoQC3^JN5Fit%+Uv4`@GkqoYg|
zOR?ddZyM5|tum%-N-9t)s$?Y8O|e4>YY%oEC816)b{Lg~<k)av*0+iq%(vcY0`OTQ
zbBkhs>VrfF{zPgRx-2TKJXV0%tnpY$qF3%#^fXk`a0D3n24pYgCU!N9Kz;L24&D|*
zG@-@(&|rBH6;DIgk;Pb&DBzc#dn>U7i9_FdVY__$d=v-%*q3VU1ygL!y`9J%Jq8%1
zgrx06tvZS>>LYZG3kQngb%46KF!sd;ng(fb1d=Ip%g7I34m}6D(=pTceidn1#mr|w
z63;TJ7Ri(w5#~}rw#7A4awXzjb-`1B(|qi_^eLU}ZQx#?5ufy%H|`#MHe;Euyf}~m
zG<$I@y0pg!AcI+ekv37<T7&CJ5hkO>Rw8bQ7u<zdpSYM55<2@v#YjXuNvh!1;==aX
zx?JsFs<JqlI`<@ySdG(l!F41^j~}jKs5J>tzsDi<(WH==%z`ZtPwTkiL}&sO08rxs
zKM1$Oj3gExbxdL;lYn_}oAKPV6lxCoLEMy_F9W$HdzYZqIt)?|B4JxnCbJj5_~F!T
zw%)f&mw@quXCpO=`ZcT&O(^%Q<x;J2c&Kj}tx<xvA)(~<gMVM!#TDxOMW|6?Aa>^C
zMxos9t7dHtsmICG@hb$(qd$BxSOfkZfqO6NjgV{=1TIz~RnsMW{vLo#jT3*wRwT80
zBoQjBB_BKtUy`pXkNC8hiGaC8aBJ`lA)hlOx`Gyz9S&_jkBXBW82y45kHS|$&`3F7
zG6DHp#0-QlmtqM%(IQb&#Cy1}xLA2_fEeeZ7p>9cQ@id;@Qp(_q?2&9@|097l<~9C
z4I(`idq@&^Q1!;ecmqiFiKCF$K{qI=hyVzY%)65#WsW0hU@U(<2vtdDiE}614T;&Q
zi%OjLxbgQOvhQ92DOM;qv|+)}^#Gp)(2;_OWN^r~ffCLi33qjU6an-~T&$4lo>Kf9
zs@iY5vPcY!BS`=i)s-1sF(k+KB>S*SEC-ii9Na=OLudcUK5H-PT2l|6Lf$CUFn($b
z2^~Zz76`mpjo>xn4Yu2n1-Wgo2|GweOJSp{kJWGxtKm&UpfoqF(DtVsCOZ=Pa;%0M
zfbOFJMw<aeBs&#ZbRXUZESE&dy@sTI`o&5FV4G$EKeLfL=q!UeT_%>~cPfa{M7vat
zL*&5}JdE&b$9gV)*_we!`6xslVRhU9bv#?P)K`8Yj(c0j{jiHL{0P3h9NikAv}%m2
zMHD9JuR$9NivG}I1i-XHutv_L<s2jlhW_y9eXf~;a2-$E2iucn-0caqHw(;^bOja;
znvd9`twDwHHz8b=P!F!-nvk{vY079hc&XuKOR{ZLZ<6+YE5SYz3G|Eb-4R9m(XAG>
zHz2bS;&~7D{30lrn?8rrDk&fXadZ+_UoQdJZ!JSKZ9g{ThVL^)W7LClBLUAmh#L*&
z5@7i+n~CLhWMDUiw~X{e3?)UdRdu?=Z9`B=v&EgJYJom3;?S5Ch;n6yEY2YGN>a(E
zAA_wFb1?>!i>NI4Q9)7+z4DicH`<MH?&YsA79rcP`x*{QiKb#y76qLJzYCA%i<D6c
zY7!{!&!Mq(hjYZ>gp26e%)We|&rmrz4h`z{#SL>Z=S^TvMok`t33ptpz8z2T1;dgq
z3~dIEo7Al!kz$xzIloKC4#D=!$u6G7>J-bjpit)QJsBBqpTJ>>$FrODzR#ok&ti0k
z8QR3faUzj7HsV%W?#iu+eh4L+e4>~OSYBOTPR8Y~`zE%1pGO5{2tE`2v!huclQx|m
zzR<Q=(%LF6u4XmFh>;$a^Z@MN($M}*?YZ-m>Tw>QEH=1`bgZvary{aeO2?e_$rQG7
zG?We-Cpd%AJxj*N-qh8-?JzQaE_O)Y9SEZ!BY6C1SZ2#(rms3Fn^SXzU3cEtCS{P9
zdaqsmd1%PdiGEPRw%PpUD~?S{S(yRa_Szo8&Lw?S?lA0X8>B-fs-5~in=yX%QqkTG
zwL(_M{DrE<(amm#7%mlON3uAe0Hc6O)%|qF3(k2|i7Cdc6$d_cN^MPyhT4(oo6leC
zcc!;Cx@#U~5$iSN;ryKw*|kyDA=6nfEpJB$MT=~hdA)CHxFOnp+Rm3p{j8{~5b2es
z)fva;54QS5@wKaaEwHjEZk+dnZvr(*AYgyiJ*l!cEdADLayR(e-Llf#dXf&KiotCq
zcH!k*8+N`B&_1nHLfgE4AU;J%LzOn2VtJXcL{if7SnhU5bJXd%(;4=>DamDJ?el<c
zCpr@Woh?XfL)Dg5i~CxT^&X_TO+P=7wy-08S)(HWnyf;mj`d0Z7p~98ebr(W$-1Yf
zXY^_#^0#og(<hCh7PfkQpYZhCZsnqXwx#PXRC6{LqnPCj&b5D6Rme0FQ;(Tcz4m@!
z&a(tqAZ8aVAc;eCC+u-Ta*~eD6#ka8hc)f}M&6422>0O~uXx<YkE4nT-USAFPO2@I
z@h}!sZ<<`a_O#%$;kJ=(<z?Jex&?iq=$xc|w~myu4#<T4;S`Z|Ue?S%DpT?p54F5L
z>)o~kKfj(}I5ZtP0B-bX3#({y4qv;%MkKyn;XDWjTRl+X1O4i0>h4WmwCXxO6bQZS
zH^l(BUA;X!UBXm64p`g!htxElXESORP{)?9+)=T#s7w<k3w!gSl@gu{q{IbJlsxtQ
z>5BzHPu$97ym}e;RIDv|s0WeH`p>O8;h*5@TU$IQeBAw<iB77eYSs`&l?ZvYG2Y}|
zy<1^sS0guh=OA+A`(kR&0$rS%RTloh#*XeGGdAPP0+uxB;M`$|V>XK8K0)LIom+wZ
zaDS_i3E@3xYSET?iEOq8i6Z(<JL98E^iof(kaj=0R(ftRbqxaHy0s-x7c~VtsN$`N
z8$cxvBh>+92d!-oM%fYW<y9-x##05Uj6xM_r%Y!?{Q}h8z4b01a!MwuU|8sA4@KvP
z#-5p-Rq%u;973ko`3E9bw52OF<wB=b)zaGIHgApp87I2k494n-+LyYc!HWZt_6CU?
z9o!lm0~<8EfE`|Y(g6of?^zz%EBKUcaBh$ldK#Sleq@PCZS!XV-U)qhG$wbkwQc4?
z)V=jMRs`9V9u9!2*=A>-##lgqcwwRIyr&(RllvoV#3;%%zI*VfONLaK44}z&-Pjv1
z2KsYkczNhYFKpl6KDRjlIb%oS3kc}Gzq1)C*4Bn=_;R}%j(RECYGn&1cI547OI!G9
zJh-ExxU8-J;TqG<48~IvF{We%t0;)m2;LZniZMmpRq=q0H%3EKR|F=aSNXGSJrzEs
zD;$AL!u*j_?*1h||41jq<@iD2(@L{!j{}~CAvFCPCD1;!Id3+y37KH(j^=l~_U4G5
z>3yCF;Y7`mhj7eomv!)eG4QNmwfbUPa}AFq6i{eBE-G;URbJ~%LMCX@_;V9aM^-%h
z5pQMVYKrQajc`oG`V@~@ATxBb%fVYhiy*z?E#pfD2O6_CC(LpIs@YoWDoeuhTuT(D
zQ}pxC;-Ai3IdtM_g?obYL^B_ldCAVzx7rY4XhAH3Kzc>M-HKz#`~YV}UQUWJ@tqPq
z8^ez4%_*7v$pnZg#dCe^uMFQNU1a~n{E5Iqknz^w6#`|MbDJL>r6mYx3sv7v=rUIO
zl>YX9FCzie^7*sCn^&Br&T6`D6NB?6Iu$WTaWE0r*sH(Nto=i5_wVq!m;sXC-UV4G
zG9sP*$ZaA;_rG|1ASb&VNglkORWz?(6avTz1&|qT4Vi)S3!zqu#1O=fccl`Neszlw
zcvg55;0MBvdCz@nZr6hE-yV%V&;_t&rh_U0C|p=t2Fic7>umo8fvMc(6Wi_5TzP8+
z0{8PyIHn222R?wqq#y`Q#0SJyL!IG>dgA82i1?{jJjRSnp|0AHRZt1j6IYEQ@aeC!
z81}gabDMGUF~gmFGmL#baS|_XVzHle8GFti3mcuun{T^5FD{f3oV51}Gj!R7QNzjA
z-I@WYWl8#0M#(u>(s?V{b!Xj>&8g+)&HbfaZo0}c?k@e_EY4tq!1?jCw1&DA1t0tD
zrBLAZTSAkYJ`c9aCj@iaqVK=Bndw}~nzl#5SJqXTqp}NNDd{8t4+0$wK68tiKgwA}
zvummh4AYLlX1v6*8Pzhu9Wrs88Z{m}r9EBd&7ZSnhTJN|Skg^itB#4bbIb)>Rry`2
z%RNe@hTJXWVK=ZplkFWpFd$H0H{4;9<x{6RWx%t1#gLEEB+u_ThV!P~N(imXTr=VR
zk)F8WAyETPp4xDnrz{|q#Y!PSs)+ila#c&A2&E+<H>=nBl<o1#6|hPcY5O4LyXh+p
zhC<?hG&RNVu5P|m%>1i@we!#FO5oZYagB~ZQK(_vKD$go;>BJ=usKb(|3%uBj*fu!
zuHbHz#=K{F<tmz_>Is6o1#>!|C@MhW%|M80fzBvUawB+qAG=(_a?{=+gIA3z*y-F}
zp1XI$_RE{3_#Ypd;twc~Z(L{_waDbt5Q0rf6gGtjYv>cVy|aYP?xjS_`C1$V+fmo}
zvpQ)OqP>p!kTjUDbF9Go!1eh$n9Z{odluSxxmJG~dWQ_S_A}N#+@nIZi_Y=>c(uxR
zwXS<m!lHJ@rKAX<q!)%VmxJ1r+HntJ7_CPV6%~A;x(?1pL2ChBHIw%ujlKIyBUZ_T
z$uqJOuJ3Z?t#pmk@aQa3hupwBn*KW{%QN<@XM+x`QSl*{jbY<+U;c4VcBap_4y`lG
z`xEWE>jga9dH!TIzr5n!=UsQMzN*=n!46;8k;!N5Yy|x++uWh}f#rik{iU75B36M;
zs={acyFGV7To1i3>ZqIl$G*g$WuC+QaYK#a!)2L*fKczlQF<<!zBSkiH60#QqZ+GI
zJe#Aiu+BfE(rLpho{1pR97m7uJ}06FpxDZ4cz5YEbD>c6!}vI;6^C9!j*fuHhgB$i
zgC$L3{SsvMg4AD&B`PA!q%{NlW|ed10pTDx`?S3Eh#kta?#H3>*FmR3s0Ytbdjneh
zdwaoNMZ9l*LH81xefQKOts)!Pg3rUHRI{aLF!tEVo*1qg$(22*+@{apK5BHbi0>=@
z+H=Mo`{q5(Gl#O;5o+9}&i)~_x4%qif3IH6^4Ng3(LHkJunE%9hP@$QgCqUn7{XIQ
zyOfH<1DaoZ4WMF)(U<?$jGfak4-#p<af3{_hZSO94OOe^e9(h}#EdGP8=g?<gi5?7
za5Yp#Yi$b%2iH_xY;Eu9nWiLnj<5awt5KN7_Z-lT3d=->b?}5FXUH>q?EB+IyJ{`=
zxK@_~i=AtKf%)^$McA`?v|N3U;qDk<WnbbWt7{SRsL(sMRLxmo3OL*w5S(u3GlKHo
z>p*oI@IK`lWJYxrRHH;_;`?9cx9UWrN_mal)7vWk<{0i+1$qY#O%9CJlk##(P&z4Q
z8+9IFHW%SE3bqfZ6F`T19;C!cLWzBk@vks%^!3+D`vKfWUDI<{F%v+;^(a!FmAxiF
zU6QfA67%S2g*gx1Q{KD7#zsNebueBTjNh~$OrIwN_eADvdYTVcqByNYj;AA<uSwaS
zfEY#6a>I*Wso2utEl{VnT-#xGYTiKg;F)pbZr#dEM0UcxKYH%P-%25M@m6KeLt!14
znqglzJpFmN;b4|6Fow1UzzgL^8$<6B=QvOHb7gIjYJRi{d8Zqgjbg*(JAfdiV~(5|
zJicqaV0*{;=WGey0oNvEgo8i+tzL#p$;oncE5KWD)D>-ZrhtN!+Q+J@6&u*j*zhV)
z+MGfe9tIwe^X587e?wN5^R%A>^*L<+H{JT*`#FW}cBN<)^*Ok=N`JHLR}^ImVreD$
z+r6i5m#Tkv<GAsc=zL=+V$k}y7edk8`^9+qy?V)Bkz#&&?w&rYR+V9>nl}oaa5Ytm
zce>R5MWVf%4_CU{yzxVKlC?JfV!R_Q9<!e#E#6v#0G_fnd-FhL;^<xoAjZC=j$bCj
zb8lG7QU`zbv;IHF1MpB1<S@S0-rsgU)Pui6o3-;nOgB=#$ME7^kRr@rZY0)C5Xud5
zSF=7=$G1>a5NZ|m@m%gj!)8lD-4V^LI{?rrL3ErSm^uaWvYj>n(B~ov<PMKWAnR!O
zv+KmX=Xq`5Z?G-JyE964yK3w3^ry`n6)1lJC}bTy6WB|i_YLg|f^D%WTQD^bZ;W9E
zYccXv*a-brb5hScA~HoC!_-z3JLNRvW&p7nGl~e|{|gXNm)n*)dUmlq7{8XBuxKAT
zW(g$nxc)XQ&+D3o{0;WtDREA3x~uBk4infxEX=^NPXDcfq$D2xn$u1|PD6V!RJ`K0
zK7HN|(dR;F@7e^VGNqNh-ct?9>2gOf)lk{Drtv=Fz$h7WZFF9EbdOHa-&D8^Jym|o
z&DpD*1=7F`xHr|+vlsl?mR)N-sh_D1^B?#DyIEB{?Cfg|FPX5Xf=V9JO+5^$Z@^9*
zadaf6?)##KaKz+Ou4(lZjowYD5s?D=R;fi-lbhEO<9;vrs1gev4yUtWU~|*O%NlIw
z@DM|q7@|wgoV@>slNp({{VO0um|<J$h{OITue5Gf5)!D`nUmJs<ia&iKt`(YO8sYZ
z=lUQsM4&U2fD1=P+-uf(PI?zoq^&Zp|B(RlY(|BOwX=T}dTxY%3^zK}bAAT|+f(W3
zHLD!_&0)*vTW#7Z63MpD!4&2b3gdnQ1WKYA+{?gi_<?fWobbB3RuP4C6oaL>PfB0*
z8z4|xuZy~)MkPSpBPoakEK96@*5glJG-cjO@AMkXzRn6@|7;3>dtk9$h--Db$9LOo
z6i765k{BW)0vaAxN1<J!kc{Z|ysfI@D;80cF0LNbn3oEzFZd(#FXA)>{{Vy%>R)Xo
zNFHzVUH>~E(9c&PH>;adJlYf@bvaOo!aYfPv7E2$fJgQF4ATEW0n6pyTQcY3Y1itn
zB3|sAdvmgp{%2wRGwpuQN7Ufne@6r+_kgB&m+`Q7gi1%pJU|@E*7E;?2z}&KPv6Bs
zmcC%G!3|O!L&!pFtenUaZu$vO6ZOVGP2^F#DVv2VyPf%eqN<G2dcUi6=+g_1Eds&8
zCX2)Yl7PtFE2ia}e^wvV{ra|Vp<RQ$3UvWh)pd34(_j^H<B3`D%A0eAtr=3;h`JKc
z@&3}*cHZ1yG=Mmp{~4~US&-c07w{_}sOt;=U}yVuh~ALAG*V{(u+k;48cP&0*Ar3G
z3U2!tqwUkiQ9D8N4kGp%_5zEuk*KZ0g=~n|-!0TWW}%3W*yG>t;dV4@uiGKdV?O@+
z3)|SF&ft3L<nZqUzu!7=aJ`z{NZJuh<8J4mlL-TlaN<$F_xsk5qMKey7(j8-AKNEv
z%8o2-N8MX7n6Mo0wAGt<O$$V{J+<E>)zT#XFJ;0$oJ5R%jv(4dl>kYv8CeO{J=LE@
z6w>neI_%Pra9Z#@6l*xqNkPpj>~N%aU?wf$Mbx-wJu`S_H=Pt;GIPOSKZkl2(6;c*
z+q>?Kn9$^=rMq8Q>zTxl_h_|c*jUy+tvS~H!ke#b$%=j#Wxd4fhJoWK#*8RYAK|A_
zQf*&M5DiM9@h|7=8`yFmDtBM5-qRX(hyA7k4d_YYyixYq!A&?SG7_dB;GsU}GX!%o
zhwC!CTEo(#Ub)YG9<?3$tL+DDAH5j=O<EHN?1Jy~ZZ#G~6Q<r}f8G~%x8jgbuNk}b
zXpIT{jj;;jMeN>7mD*}Qq>*nMz2N(qE;v|ccqg^`(r}OAu+>8NtKP*YbKnR5gdcaS
z_RrQW<{U!n>S~}HF!<y4_uG08;nwy~<<NkjR-%W<$fwUl)QA7Ohm)l0;DBKO_#*Qj
zZZ6_cL#w{tBR|+-O)PYI20Bv9oh6YIA;6QSB)hIU1(kOh7srOoqOL8PW~k!-B1E7c
zc})g62aP5&<nlUt?_{#K`}z-HS_1g)2O6RRm*=!~I60j_o=+K$ajtg}7P9O6>T+x6
zlzbRsCE}<+Q|MBw*H!!184d~OM3^H|gW}j8Xw3HByFb}OwTW(T5zj;9SpYP^m<v2K
zaN2jeNvj?}h21c2qoE;+A>Y`DwDj`zi{qFU_vuv_VkWH%oo)ByXPKf~k1}*P;fU9$
z$c!10*&Me*75$SazHxUHFIOGppX2SEnKnywh$Dm6Bac#{vFhjbRe^8oTm!sNQfSAW
zdGvRRCU9xJ%rr+Q69<M363Gd-JMmDE5LdI2WtpFmqqXCMNO+9qv~!xlX3vg5K~eS!
z;EpwI%!>BQux1<jYQm1d#nm&gW1(qAv3#}9FMyU5LuVs<*yiJuhKA_Rba_vGAIJ~y
zmV|F9fuq76nDph0YwD-4mFUSETcxoH=S~b-%hYzXzw9HNr;I7+fZ{Uijt<UDNuD^&
z+}ttdrhakQ{qpTM!^X}J-Ei}TVm6Gq#8fK>hA4R1F5Xj!yLc83AH*FqYv6zzV!W@j
z!}B0mk@?5iw3OV=wtV5#C>B_rp0g(jg$@-Ydhcsj#e36dJ?Td%=A7vL`MWrM3ny|6
zZt?m{PhibmHZfhXyyoH!zqSL1I4?9{#$4`$+}1h|KVCQyGjCT+xVwB&#)D5C9Z8}G
zrcrtcuIzw@;lYl9{<VL$bDV6tc<b0FeoSMUD1oPd!@kd~x?tA)y+SSJSOD-4&wVP;
zR)5<K;e@CMDu&0j*DHa+zxv&<P)L1S+xnTkRrXDS>y(;-%uw$RO8WjauQJ)Ss=l9d
z9rp$vo(pTxnu!W*H@!Q_6izG)HZSfGhFZ(#_U^-%20~+$zqbsgRh(p3++*9xK3Pit
zQ&zP-Z=fdB?X|lL=kz&{s({pWl-^ZsnZ&5=7~yPPpMF$wS;rs#iG8j9?WWeSEVe`2
z0(!r^bN3=m?}3EWez{@o*Jv_&q6j{2e!G`!ZcU6(;iGI*-U~^+wRVV1uPE%eW^&z>
zJv)`{sl;D5E=Ub9A~SPv?dJBR&%v1XJnr=K4iz2Bf7yhol-a{JzVqQWt7Yv@$z3la
zyE|0VJHu4-a$en`J~O}EMJ7IA;5ogp$aE@rK#(|b-9CQR^n$RJNl`wqB{6)IRQr1M
z>Q=T1&99~unR7C0;)2X7pYIU@g??=hR;EZpAH!%GfsKg07k4L#G&zL2&j-EfozpzN
zLfbVlq)sxYWszg}xeiJ%?z^4ZPuI+iku`+^sP1Ioo9_^9w|3ui6nUb1U$-(k9}HT3
zfy;J5!5MbFoU^EaxX&L96Nu4?HU=AITXNpyIu1FBgSr&B&-KOx{?S`;q<2tn?sAIi
zj^?txGBz!0?L3i#GnsSdEojesVXd}b-s93C`$FV|*7zv{ie7b(@O`Lz5!|CX@R#4m
zLt(vU3Yk$%l}#OMorl|Y@D_!QV<-cd1FFpR>+tk-ZHq-p*IP}LK6M4h((Pea-c(Ro
z%E9!6tIZzf-x`p!%zhr#=dR}~J%uI2SC`?t%TsNCN|@#|$6~}lEJ1eqxvPKl9()z1
zmcIrLQno+;cuG=Rd7WpmKy6f6&7Qun*o^U=O}TlTf5cEMd90KEEMQUZiI}y1mEXSn
zm39w++6(kvtTm<DyR_VWF3fe=DY;2F<CI8Ghy0xims_4&0Sf5RxF)BJtnY4#*n88!
zEZpEI55=nbASC4i?y@o?KVd-5Sv#B>Oou=LkVr`Fk$aTC0(lB!5%CmjttsW^3tfk#
z)6Vcvw)%&@g`yBqF?0^kZU2(xn)-7ZQ`B8r_2q9dqBoAHtW084Yty0CcMhoySl+$X
z1~#Wj&W}k_4)xaVa-9N&fa=t`JrMBEAVWbW^#4#8)nE1sl*Yg((Li~c53h>%YRfzw
zPsJ;eFc&~UBF>(jUc54WeDx7-i0qe7Ha*(IgE?PJIA6Kxa(Q&m(htGSyId>&R00>c
z3o%agK5Xq+qmt9U%tKqZZe07-(ycSR<HGW~etKCriX4V2o4JIvXFB)oerXW9M*LBa
z+d~z7=+m;Sk3Rj_<shQ^x?`Cw7_Zo@FC#G0iJr#lt1<B9Nw=`J+<fxck%b^b9Z6P9
zHK~0kUWQ?;SY`oTEwzUV`|s4WXmW$5A$6ejof6Y(jcPkeKNe5Z`tKl&FeFc^F%7RT
zPwEaz|ETO5+x!3kIb-j=<k@Vc!!8YP`<)WMwI^=xXf*k27c<6g-7=HT&zoj@89cVM
zZ1yyS?9zFJpxt2oD%aHaVJl(7#N1~Q<3t3I%#QRaS2@@5rBaD`N9cbdCxb)|D|Q+1
zuR67|6x6cgH{O@#wJ&q%7<LrN7%zPXGIdLbicc<EEjQh<40*mPDUw(L+ts{1M4l)V
zhkaWMDx|yYd$=NpQ?x(K*<q+AN}#s1nIU*^c0BF8jbZMCOws7W+L3x`=17IZM1{8b
zZl`Xl=pWM-fbF@_-${VAX6HEuy!R7agRj;9y=0ud6;y3rHKAa=&73qo<OO{~*bC^J
z_CMPA@R0wB)FM^C%5fAc`8e3`sX|)6`)=QrChUWK(`Ys*bkc{`TzZiPNzYIZ6vv7q
zikrHa^Igfi^~RSQy<eX|*x5GkJGR{}Gx3JG+1%&+Q7;A%=QIISIsjgM{aDHcOO`g7
zZ!i64Yhr$D|FhOqAwd1h;C;mf@pW+eBJA^D&VOxX`t?H^W~V8=?HkHtl(ip1z9&iM
zcHj9Gv8a`sBa;H6(p^NZ-v14<VyhroFP+$7a<cpA&<D6*!_ATk5}f+fz5R{nVB^I)
z1v!R<e2Bi2_6yfbbtps7rMM-E=@C<<fc>8Fa36|QMCt&&YqR5vHF^gRI^N`if_&@o
z^lOqLp_$E7J4(N`FLOh26PpG31!`qe7tVUQO8qi)S6T@_y+he<!f_L|%T*=RG6njv
z2gDHI-ki<gk4hMjcH?1aK@n-9+_K5uREdi??c`Ay=8`ReAGY22a-{dm4aB-5xLj9t
zho!z-peg$(<hF4cANm-goOHvkC2R*&oU&p`HRN`V%W`jNJ(x-W`3VGxYeuIk<h65b
zhn@wj?V&p_2|jn30k9z@RMqeK7z$1XKIjNH9vA)-t*F6GNa@|3TRpT-yI!g>uXnvv
zJ5IH%#a7M3v%&}9YS-~tbk@vPk|f8W$9rGT7>E)8Popw4(-UU4X|fx@(@uPXEl<Tg
zCwk_!*k^2Z*rvt-(ME%`<KX1X7<}ptQi5=T?zdf!))YtfJ_+YQNfxUKksi~t6URww
zD$w6Ei1Nf>1z23Lhi!WD+tKUC*(f<PlCaM(L$9GU0hLxWDNG<@{~fRX`k#QYUV<^(
zHtojpwI;=Z6{YujX9Mg~1qr82msI*$kHk!D8UkLYGO#_A(VDW(fg9Mnb7bCv#3}gh
zEXCOOPV}UGfx&T9o}evj{Hx_j_ZW2*Z)W)yWr72S>z_io{*2P+Ppp#&udG%4zMjYj
z%%C48nO^$oSaC%$t;!B{eEOp7cC}9X{AK~D@9IjEoLIf6g8e;oY^AUn=sCL{eY)U2
z_h}R`C3dIwz(vPMp|-+q=1H(bYnTRm$fmho$<`GmYm?5Es9HGD7KU>DRN}q`F}v@{
z^HqeP;t(~NI*Wf1VBklV>rY|C|KYoWKN^gjB!5Xha|3XlA0!lRV5S{dS(<~|9xFFP
z^q8BKcjOq&=awqcj9+pNK)y3v#=JxPSRwMa5{t5RM>(^t(Apz>HXt@i_dZ|#nQoXG
zWOCUeect?qu18MYOHZ|22CLemtcNPw)rl;3MZQ`T_(tnt|A_Xi!J&CL+c58te5bo;
z1g|6$KzDO@a2eOvbJDh7rTk=ZkiD0J%pJiWCV&{CVz7LW6UJFq)s5x59(|0|J8rNM
zb-Alm|Fyt6-A^{0y$G$|2Omczc>!tOcjmlHird}fk7I}73dvp>vYbF&AyjuPl^mSp
zXgmPbu$|5s%ha?K;`wUZ#tlgSRUCdq%^dKG>G=^{=Hlk&j{dBHAF0S=J~**H3zMRD
zEW<FXiPk*-{=K}SI`<)+uU=*=y3-UT30%~|t`ackr;`w7dk&75YP);e+AQw}Rz&Hd
zpl3ZAMOtog#r%MFGqCSP5er%yR#*gn?n2s!N)n*&D=MA=obPZ{nI7_CjB7bZZAgv4
z^)cUR>_(#$oEKB8pCF9RC5_X7?K51Tee`|su~=1zl!X+n>|g3Dy)f=J`j@I!0Ux5K
zQnrp3Sm1xrs50pbZ*d$$ELPS3Mu`GxE=-l)-1_0v^<#zI57PA#0puy`bg1aLB_b}2
z)1vxR^xb(f^dLLFXx|BiN#47!{78Lz<UM54S7s*!9xqJvd|TFiuh#(`JfSXF{9B2|
z>b|w}66zfUl!?OD!f$uZu0KH)KXJqx8T<pasPKCuVNb#a7fQ@JC65$=#`QA6LRGu;
ztub=1!2Uy%e7EHBFZ)4gr$qkij<D|<>~3I_zA>>|(Gb1IgmDZ90At~=5zqShZyQ2I
z1=tTlcLMOD`!)bPJsO{1qJPsFgDdLucAPd8Dnn`pRhMb5b=1D?I-JtoxuE?Cbf5-@
zo(M#TXh(k3OGJ*a4NK3Oa#w19^Fr6|)@Nba2yl!N_L4FMQnn}i%Q^OswJSB}=7gSD
zw+xy`(ny1e-Q@^xx8vkP-`)xN)`66`V8(E6o#OKXyb#BI``Jb(S6PuCcb+9I_|O!9
zurd%oTL8Pq-_eS4+Jg%fyqk5Hkyf1L#E5~7O-l#os?4*p%qm=E>?VwhqT=>S`l|GI
z2Kvd!gIH>h-Ul>zm`fEGNIQ$}^uoJvYd-+X&2gT7XlDhw^V|-Y+&35Ny${tXD-)ZW
zpQyco_@fNt{<$lhYJ-~~oC6ho%Z?SULB(MPKEzB<i}|g>q8Qu$6|{_Jzy@cdXU9Rg
zjA7aKq%`EdZa0QyC|7pk>@pV#+rG#&{^9wD=!>?W_T7~8<k79=%U0^{I9h+M;`Wht
zqw7*)-}zp~R!F^_Q|?5&8JfPzz@lDSSo>*-#{TLqU*)UHizb~>m%H$#!y#pmfAQOr
z&Xer!z3!^s?ea~hzJK15c?x4xiEcyO7Llbxwj0pyfN3{>{(}7CPd9bL(?3?C7Perf
z%?EkDOR?YkUc~4eb`7$zm-$N^b@W?tR;Gu|9pAhERhZ$_o3~*;^*Z{N^0jc=jPEBi
z>eHXqsGRSx`=)gFadPtP02@Po)71i5tiGeDwQfLk6*g16S7X1mX4?k4BWZXAeVeDN
zZiEL2GSsYd+haNx>`qM7pBioGDRN%IOryU*sNY5xDiddNXHZ{i=v=va!n2#{&+efZ
zj}YbcyWC{nRS;clkZ;O1>lBH*a!E4ZDZ4lK*wSLXN%!h6orcSb@r}ZW<1gvybDlJp
zo$=E~ysx-%&Ae+?kwQk1!gB>vHltp}+TFYHNm9a0k(!J!n-ij{YmbY8j>2$gFP745
zJ@;78^VEVzhMODTIBo!;DCez_Hy#e&>TinciZf-2n6mfxi9|Xn9y`vqK417@z<AKG
z_KoKTaae>rM6@MaUDm%mGxIiLo}Rs0Zr$9SeqpY?J&snZ{i{uX)K>nOw)r+_Z^-Uj
zsq%^9H+zuk2gxfAO#j~8dha&v$_=4it}l`$&<c@z3np%8`gC*fY`0(8p{g3V?bgU<
z^ek9sY@RlE|D*78_16+_yM5+E76`-kw_R-V(%{vb9?7@2-MFVWDD^{{QhWN>cIO*5
z8Rjun>N*Y7{6$wZy;Tyv@8|s?J}G(jo7DmuM;Rud4Z=#%C1ckwFAsVF4^)fBhDg);
z_btzCzZw=b8QPS|XV(o6h#K4XcpUKTJW^hHKz8vo=^@XjyE)r4Ub|bE<f?<=is$&E
z$>`%eFUOLWIqh=zG)V(jCEr3l-gA~OjTGZU+Ya?L*JPgi5T{n$0h@v$Y(+e!a%PH^
zc361+2KH$aGcn-<gKboPN}P9P<yB+o?uO$#JNQDLtKRK#nX@zvUG!UuPOHSkc;EPH
zZp^8Y;e*)!N#4(eE>w&cO!mLEF3`I@Thg5G?0DBf)_!M)fm0&rU;u|ylju6(V8PVD
z;^#9Yii?{<YU;(3<@aZGwS5C&GcaXR|DY?qW3Tv~O}Odf(h_zfiuY`Cn`K$n*RpVL
z&sxr54}OaJ(bs(!4)T#t=*$i5H?8*uC)KRI0PfojM;M|ZK1;HT|JEG2#^I1})#2N4
z68ZK5?c?9--vZS};}0~@C3?jRxr#16YXkdrx4p6D=Q|~#$uTu)^N<&Wwgn^Z{7IF9
zF5PTxVIsj_lW$%-X?#wBdU;A^<?g<`813xOU){68$|5+W;6*PNcD}Bl_5V~d{&A!1
zTzl)`YF5V2p5}f{=$J<+U;swf(M{)M_x`=7xvNCQ+R>X+>3o3C-`bG>u#nVwU{0T$
zl|UCRUU<G8fFb)+KfJ1H03M9H@zt;k-41C%?-U{*v1R8h7xUL>Kks#X1u>Ady{aPi
zT#~CoHfXS!4WbCB&YI7anYwc4o$ZpnW-?8iTZ%j^V{7?;afQG!k+;Iz4}7ch4Be&R
z-l9z7Q7e_ld)20XtMfPt$HYoDZ5vax*Ng9^{Vq@36qG$|rTByDH#H1Pg-Efp%Vj*Q
z(6xUeKqyhkco1Iqv3O(mH-!+kWH9^fEm_P_I#R8g-D>vF>|^B>vGb<n-~Qg*@;0jQ
zn?PY%wBc}brmi3C^-{CxV1D(En_-3)IUR|G;%A!sM3fWX>a4T-C`R=@JaOvdbSffS
zQ*&arjECyc>C}M3w+{BTpB)$N26Epmxb12aEwAnSVK}<K(xm%mj)JJ+VfjU%#05x+
zEqhA4rzYN>k(oC=V@_sf>GR}2Ton7f;5mT^^k7a4yuVD{b@$Zj!(kskW;gPrWL;}Y
z^Vm{zFX@Q>bJ%HpLIN_wdM~g%7kDrhh=dKZ;B>kL$^h%&<s&6wd&jfq-ge!6==-T8
zk9N6^#^d!N{&6ejgImYr<0`gdK)=qvt~pqDhH{+>oa-G|2_8j9U4f^PJ<RlbfN>u)
z({C7lxsdaD&BSm1XPm(U6miduwMDB~reKYYDX(ZF+jD9Id&s#H4pcuyj~RwSp<$s7
z#rg^j_V|O3!?ib<j(1hguXA&2n`#Q`Ux)8>3j2KNKc}7LBvkjMi3g61siXG-mcWgF
zdauJ_R&Cc`{npMXzz-c}Z*X(z@a+DcgiqxwZd)SG^+4)WP033wkF%f%>gd=#4R>+P
zcCW^`vwruKj5)?@S}n_Xp?nluIO@3(alEq=X(ztw35Kl;@ou+^L}~WuQo#{tE%85e
zd*D<5Gq*%EkcZxtMLs{0G~{RInyvhNdw`oV6^qQ}M%l4A+uj(LF=u&VpvL|%^cn}c
zsm?0M4fsRvc*#(aGMti>bg#3owlF>^!DJ~6Y<`JOdy*^kUzTuQ9=GuZMCx}$e_dBR
zHrOl}9Q3*ve89L3W+&>eh8l|g%bly|X9b_}U(TE<AJBfb$9vyxU|pSO_9%d}nz%9t
zxp6{iZYMsf!}`<6yOj`iqi~ZJ1J!QLAZ4{c8%HR1S+=9FK~?U3rI}S;kfZFt&33jW
z&C;V{D+URs>f+D_J(Xl!9Ysvx%YrH9P1|v<bYa`nAGNB{0aCp$#-!RXcLWPfGT_k<
ziXLd{g_Mpy>fXFN3O<=P8n00v>dhY!xp@q}9m)aqG+QzpBp<6io`UW_^y<(?^<5;i
z)XezP`)3FiUC^CUruHZXQ<rskYx2xio(CM(08P_H$1q6POG6$q63TObH7!F~dD5uo
z&`tNvk3CDW0A#xiPt7s=HVQQMK6YD%!0q_3>uTkg7$G>2TmtVwCgM=k<axqsq2Z|X
zfpA7%t8O?(cCiJ5B!IekA4T_P7daM!s_0>)VT2agQvT|py|MFG%kl%M_~odl3|;_4
zk<UAMor10s_&+03x?>Qzbh70`gC$F*RBRCpc_s+y5WSC(UhCE_JF2AicT;MlM*TIy
z{Q9WFL@4Eq_-p)7Tg)Jmr4XG<C!dxR@SsFhb>n{g$PN{8xMDO)O2nZo?CKs1U<Gj1
zi0CnQI;G`X&68(1S!lac)(8N@>T||4rD72gbPf_#R_FT)We0yDWZT`kW45jRFr`Kk
zr*|}fpF78FwTy?H1l38w1~%jeRF~cDZ#9A1lzcei=l>C*V#~(Rq(x2Ey7*0?hLzPa
z<qW6QmR64iL1^37UxYT5bx`|WSp}(^xGPI>Ol<Xk2~gInv2o^V*%RRqd^&3f;xaQj
z+c(dW0RDt=FV6PU{bqsBUjr4$%L>GHjL(t&%T#S(2d6iBYKO-UrCA{7<yM!MU~zin
zJKm~c9X0#=Z#VOYRah(yRXh%vXYX2-<CCZoV{vh>dI7a?xk+^Q)E@T0`I7KQJ>5fo
z(Sd4mAWg(Iq-6EH9e~~+Wke?!yLzhrj{KWhwQQJCC8R4OgC0UDnT)(Y!Zg?z&Jla$
z$o=el0cYV$(Cu^wc#T4Y${$X_b2peq4&2!VO7<q$o(kB0v1C$}r}=y^dnMePB57qa
ztH@vae6A+@5xy8dV<;SKRxT3Rd+z5=+KjG$WP(Z671`dm6W)@?JJ(}4Q{;ubrTaB7
zPQw&Ek%TN8k*W7EJZ|Olfw!XH^1^3~rzk7@$dTd|y!63RqcR?N9${Ri34n`37@S#e
zOL!3)7_X;R_O)4CA=bb^ei}fa8n$%hDwRx{u9{~|oyYW-sKEtWKuK#`f1h0qnrx|D
zFhnbPC@te<yo%hAiAj;1^TU5xUViVlW)5i4(*oR|>4V&#9*EW!nr}QXr_{)h9}Zd5
z(UHf9ZZJyQ42C^r_`bNqMlbz2o??JZ#k8MXwYKvgRl$UI;WIfU<t-WbI=pS6>w`(P
z-pgc0hht4vdi<V`{h6T~el)`z@@PDM9l#(xK6HcU|6Dh%k0vLds_O^diF0BJFl~F!
zqyIQ(aNz~~7(JypU-xS>_Dlm#U*b_HX6*6vma>+^J2+i=KX0z#o5p!jIuYb6M-_p&
z7t}JAwo%yu-<l6`2EXKf$j?Wb<iOKLFxeC=YTwhnmD8)nea@cVD4@W|t6TN64BX!#
z2M*>&45O7OcZ*XV^@Szn<*MfM9R~lGMDvAxzo@-L6zQ;JGeno!2Gj1sIi;*uBJgkl
zWd*>i-VFHwe6u-7`aEP2q|1^jd_50b-w@~ECsY*|G;#CL?}ZgSVUlMli(da0fluv4
zc(2Lst@}`{!O;qul-Kn?7heQ*YG8*~w^@d2j34(QIs}tF_o;bKBCismPUYYD+5a^w
zP1IdSEfFau9X|xpB8yyIL;P6y8osN}o$%~q#0t=}dNZI(iHK13){+w(z36bY8pbJ?
z3Z$bn3^yt!r#y^fJsNU*`A5__G{e*9sNUdAh15)Qct>nW4SvFCr9gIXNuSri+ETIC
z|Kh<bbz;KV;Esd0AzIbhC|9Wm28S62#TX79P|K9%yZXOQwsLn2oD^l8>Mzy=F>3Ix
zO#lA8<UGGNT`jNJe^iO!l`EeL&KIf?K;HHaBYev=5PDWmw*g=udEFJn%^F>`a6M;s
zBtLpW=AACex-Ii%EyHQhTZ4~9{$r>*)PG{B+82_y9Khd+h43ZlpDpAy%-rwv#bYVy
zzd@pu|1|WUC;aCLL<9eM!hfFdpC^p8Es-Yv%MM5h8n5xb+sM-~S^n#=^Uu<<@Q)hh
zv78@0p>4UDiB;@JDz!a5-os9%5#v~s28&n5-FWhXcIakhPxp=R(2Aa(6P)gxpP6ja
z4dSN877hgwP$*HXC8RmgvTXB=@bOf-Rdn*a{UWthJ?ZHNbuu$lkosFmgO>Ssg_L03
z>TLpWoyxY2(9okO^qyw;aiz-1ZOCDDYwMF4Djtq;OBcXyQTz_tvquP#^;7A6!W4b>
z#TG<V5#IIyKj`CU!cywZ7w{i{lA|3hM;q*ctxc&b?s(R+q28ie9P!nx@(abVmGQ8f
zJ;QzP+Gm@kJxx4?nm7G`E12*G3dBYlT90xN#cY?L|7HM6&T}uD;fJsQ`Qui+mf~NC
z2&E{9|M)`&-)DE<<OYR4wQ<oY_;(9FSw{a0(WK}Gi;ysM4wklLm(pSFtdEgOb*}62
z4BG>j#qZP=No=rVT&)w}*{rcp=(`uJ!79OpUd!XqHWdYUfD`nT5uV$Xme+l+ew*OZ
zfGqWeQ}00&A6l9FDZ}|9ahc)sP;ZXhl7e5LQ)YSGw5fvg`71|7O+cJ2n6_EgLuT6L
z&+HsQrojAhZqV;T{TkJ=+aH$;iX2zG{}^EKHPMgCNEnx)s5klC=ZSsqEX3*l9ZB*p
zfsYa)tQtf>{VKG1yJw{@y(Gh@o28!c_xit@uJy0U?TDdNt^GtTCU(I^cgbt@(e)1L
zF@879*)o^v6Hl0qAH))D^Dy)s=eBU*#Q<C&tb&z^t}W5c5M|oL*4A~~+cmmY4~oq3
zmbzV14We32?8@1lu~r0F&n5>*(e>pyMUHI;d57=BbV562HqYj+fyZvEGN;Yn<{~JX
z!)`E#Yn)X_muHTHdp6CIr*`p%-v|+6O+C2OPwtOC!OWCD`aU8~IOIQaK`Yyy9pwZ%
z9M{k1br0m8?|VC#alms$?)ULIyE&Xor@_}$@ctCdpsqxOwpC6|d65EZ=1%Nuuw63y
z(m}zc*SiJ=lU{USHQpufut!&yG>xdy|8QMnFPrgahTOYJEYrc-DXhkieGC;4m8CdV
z_y2tLG51a40Ne*G=2-3zdlTo!_dT-TU3OyUxmKPIFp==sa|2Y`skicd0h&}@Ib|pu
zdsf4KyxY;IGnjeoLqZMUonhn{Ct`zn8rpwIC9@5oUwGO?{NU?1lHK3bZ(sNR?#Isi
z)?t1gnlbKUT{?*A$^$Nr!*>K#r4;0u{Ij3v%s)Dz6M5GY(dXz^6nX76TAbSY(}T)<
zcFME3ILEKUuG@9C!V7_5A=dWDLiLOTZxql=MZXB4W?73q-b&?%@C?c)7TQlYcj_1U
zo_Rb=8^1eC?BIvb{TVV}o8OMh>^RXqAp8p5p!gyF2lNQBGhEL3F_<OUlYeU~CcF3>
z$3``y7r#zr^FyQ2t{M#99eY{>zNwRUl;cV8&tlx>8Tt1A!eHlcYjyF7J?6p8`r_nP
zqtYO?FAwGBAEV_TbeOYRp~P&-+{B=aGpD6=-bGjZY0Z48OUpR(P`WDG&hQ1z_=U!@
zZF~I|#835KesP7+X_e+J_chK~UTGE2yc!nuHGi#e#rabQLp!4dMl?dv$y+8zzMsVh
zhDN@LO&AR{^rwYmz6;+b{jS=vMm4%rW2{pS-=_b*_x+FLe~SHo+I?uo06;Y1H#fd|
z9tk|<7$&N6NGVlbet)1S`+IlnUx8IogQhElLp=tomSx~jqc_AHX|{<P94bBaJR!BZ
zr6**lFEI1-@bJK!w=x3$u|1O8opcA`2`F`36$#F4+SXG7Jwf;g>Ru~NJbKJ5*<J0V
zvFeVUJ5oBf+n22?ep1{OxAsNlwR>q!T5zb(L<+s{jC`8wP2c`}xp&!(Cs6e;`^G}i
z)=c95{XJi@?)AD$8igOP(>_uxArZCd8(ej9v>KppwV1v=W&N{ZjTgW;`F|4dy0rVX
zo4&Z*H3)9A+*drlckol!0~LSNlbtx0F=-kb(Q34@JE9YGQ)%H6_t)Lb`;k*zRd(_9
z?%>pt$>mXI9+S&q?)jgbywO`p6Sbd0^;OcOPpVu*wMbLsZnN>JmHZzhqwZI+?>#%-
zc3k9X;N?Hi0W?+e@&<IB;%N(L#~ay>ioq<u85F$p-qgTKb|~FF)?Mv}N=Q)agw|4l
zxOu24P9MD?NB>*w5L9-89=I$-i-|pT+I33gVW3%a72Aaw7Op3f)Qlg|%^tfS%d7V#
z%Plx)`NfYQMPJXr9cTiQk9ocR)3A_?K9_-YvIn>A5m+~F;C+G2ICN{#!5vBo_&H^b
z><x)+;rAR$wt3qe{){X~ySuW;aQmLp)n5wVFL}ET{&Ef%v^o!zt!4BxTm}V%Gj@2}
z^rm^fuWTC{EHqeGkkOINKdlnI>VWbMdG|vrI3)8Ur@!gAex~_Qx7v>%gQ1m)ffGTa
z>UJFrbbNvN?2W>6?c$TeEkVs3#bp7-*?s(BqR-ni><n-z|D(R9x0^rAuzhGSv7+u?
z@n;YAYgGFh?ZwD1I2D)0EJd$s4^$<)eNw4GwqseI^=;v=6A{W6WhcUSvEat!@aB*L
z*NPL?18d<AFUj%0g{!Vy7%KPcj=7#{c}w6elnTBn=*rKYbyWe~jT-n&p&KSeH@vx2
zzh&tEwD;w4HEv(y&%LHp2;FjpGL~jSXcQquA(b>C5~)N3O$TL4<_1F(lB7Xt(0E9R
zQYmRT4M&6KS>tK=t-bfD_`di1c|Y%8zklxer_R~W+H37Kul?+2Jwx2Cvru|YXFe+G
z?Y@hzpv920CsQqjW)Dw)<#aBls>gArzFdW?&PdNdACUWK6$4RCxsYSl*PmYf*m$q2
zn^8aDdQ{SO*#F~CmcI+0_qrmvaw<t~^Ft1LG{np*IOr1qcc0Qf^`ZiS_Jw4p92PvY
zol`fAbolCxM8z_O2Og&EG95jDSW<ZyFOy0k2aB#JT@78cN}+i8$zlIK4d5)#jaINk
zC@YA*20H9sI(V_T;_yKFMLUyOl(oNw5-1>LvViF1ycrj-G!%XmsPC2@xmXVf=BdS@
zqJ~mlMbtb0;wIIfG67f1sla2w1*Q+u)-Y1zhhc9W`~b0bW$ClxN-TS>see#Y9{TyI
z$wnBj#b(A~S}<;IaQT(rlc#@!_`j(N^=}UNZ>CEr|LY3=bp`*rf`5x%$U6SP70^5S
zMdk2Ipit5Q2_ni|OtxFRlytpoXnCamAbF25r7V^DIoJ6xWKsPq(DANIns4@`+%a5q
zq~710XC&kFIY}?cDmX8+=(Fo96Anr%s<ia}nVP|U7paEbt)=RlO?+P-ss9c|&Ixo&
zKOJv5Y<_7FDrxEyc{krL3pER*r9#RW=m7tWaFqW&r>CWyA3cHGfzoF>hluVAkI=b^
zQV*tnM_pVgXZTsU6Y{9x*$HzE6l~YUMWSw!+_*)|^;=K!hj-gqKAd{+e_pJ;R4!Z7
zyG>CVw$J8S-bD%Zlf#E?&q~OR-m9q8^pjYNSV2jD>WUgZXlIGu`e3pK9~@Z<6uaCv
zOwG-OLm=fw@+YmQHF&^DAT4<~7&Hi{4$r?d#jCV^g7VFS=osSlD9@rgtLW_TucbzX
z(xkZ9N{<hXc{QAhP9AN&2@^n<o=;xKQ!^B*evAa0OXs7`r+*BhlSaI7e3Lc;eOWi?
ztNVGjV)i}3S?=Lyz`m!u8eH<$%*#T(y#uA7@DV}hsg(Y-&)uXT*XIsLkE9o&TvPfP
z6uGS}T_SQG&o%}9Pi_aEYqGE~MqOCv38K?%w9ePB5{K3&m9Io>kNM*KMH@4PBk;Cg
z9Td5Oh}YKx)bOQouM(5J{p3nphrgCynuM>bUp$K;c(mRdtuV9L4K2G}O!q}!boP@L
zgH<zfYyWqZ3FHtfJ=<^EXjvfwikXa0r3`(_Z0g!!_r`1E0h{^g=o@JD`P7}jn7#~H
zt8y+n3dGc_+p)H)dc@W)b<K=H7u4=MPzoJ+>4c*l4Z!Y4w`0GSmZTm<<s%DmhrZ2A
ztjPWA^osWTQx#BO=066}DduW0hCw*nbFZoNjTGv-dIMJwiu*zL{XqB4zO3o5MXf58
zjbRooblBQ;a@)^P|BV6+U3AD~y&5WjX_~SB$jkMcmfF7SwL%y6^I|ppc^s>ueKdm=
z8iShKz^Af!CL`vwI`pQdeja*<rYjVkV5Fi$=Ocgh;B46$e+F*rHWr`vC&g=FXSbpv
z!KFxuI;ZW}Tfo!r>(%1a1B^s{foVpVk&nv87|C!Lx(KoaCOGJ;jWaS0A7jhaezWIu
ze1hwJzsQ{zdr@Cj%J~)a;l75>V!6UuERWb0d8g<z_Mg$r<OPoIV5nT7bKg|Sc&>6q
z4YGftU4;zTNcagS_Z%0Q)r%bjv#`c5`c@8t9(bc!xsT1SH~tS5UfqMFbO=dlKi(D5
zdJ3j=&hE|L2yOnoW^h_C6wys_2XAe1Ypp^WsdW!&BsKNm7$22Fa_w--C+gB>Ig7kp
z2~_@Ty`GM)%H8~C&v1K-4P#j*^M6;zc~WAf15F<bDlY8?*o*j3HuF*c2OjuF(E@t~
zno9?&F6WJQH&1&8E{^-ocn2R^+_zl2<CaBbx^Q9l#K!Er(^kDM8c9<AoZw+-rzei-
z&f7aJ{AgLl%i!V6#Dg&VFZ;EbIo}yNWfg;pttl@A;M@|=wQ)2i8np*mmA&0xo%UH(
zffa6<xEV8V(m-6V(}|=gu*NGBH#(%>0oCUAG2S!vccufLcN4ok*8_p(GUv`UDW$A0
z(m;0*X*Y=$J8$vXlFITIv61k9I(e!O{Ve_GO#RQVllH5#JZ;fzm&mitXvpNs!qk(&
ze*BuqA<?%_Y-Di<$y-!0T>KwIHnlnemfgQ_XSHKwlevAfBhnUW8)T;Bv-wckD}OuL
z6ODmIYq()<{K5`0bm2s+MMwp|X5qsWLG<9yJoE9bAX;EoRLYxuCyYThnWRe!f)Vn)
zs3X&A`K`d^a0OcCtBL1fII25NinS&?h$73+oLCI=QQdERzAiStxlZ3u=D1HY=|B}1
z+D@C$0H#5U?6<<UqY?{WKez-V<dv70W@+Vczt-2RYV&h3eNT|Ehb~P>l$xNK{ZeTJ
zqZNo~9_m<=GSV1~K%CshK?13>C8-lK=-)EI0GTuWabCS~0U<xa&au$lI?V}<Tfg*k
zcsmjfyHO!RL#+q>JBQTpqD~?3hT1i8qaZ&7)MN1MupAN&{cm!|7Qy@!nZ<Z`Ahs7e
zq|ss~6tI8Gz_VUILH}pPN-0Qav_B_qK`1Vn#juw>VV?HN5+1strc6B4wV(z`;G~k~
zXWgD&*gc{$CdSTuDIYXKJ&C1FCYIRuJDYzU(l}uzH=I1;hfILU-k*UPX|KF5w|)-S
zXF=qn4@^vdcxBbs)b8}cpMxPjNMbw_ES@L!8;dz6Ea19(dVKin_+k0A#zl-inEm|(
z2e!gHl0jJ=(7I{ok&i50uB01%n$-ZJfLrTYEYBWfJk!FT9sJp*3H!#I><r}wduqVt
zU3vvqs6Ie<<Cw1@PgwNQSS{^+^BbjwhE>j)38@ad@`4NM9!~uu|H*(9ANL;==Q=sL
zyOsuN$9X7)7YdFWcwKXW@fvbU@_)tgKXEzJRWj^)!#{kXp=t*#I~3D2H-$e^YJp#N
zqt6xRFK*;zBfn%6Uncd9d9mDmi{2<HCXO<`^mKmd9?WdAskj21#QlS%Gv7CJ*P>46
z`TSrOyyz*Sfam8OQ$vUAd)nPv(^V(KjYucFTebPIL8xMho}o*Ada+0G)lT$gj0SXY
z;2J1qgXU|x!kA*TNtvy=%7ip52a#&B{hnA+dP}sopHj%KC`lu;_5YFMU7eF42>vFe
zwZ1jM?^UtHg-AiaOdyjLqcl<%IXY<C*cP7&(c+aCPGvtGvKz@mss|madn7HaQeVWk
zFSg}-C%&jcYNR(^V6>~fu|R3)v`dorLS*WbC+z0ZJD0;LDX)f+D^mPuraPi&IFzpR
z>}sfaI&-u%$-Z)3f%qTkmwo1}+{?=?5Xa54Zb8ZUNjn2aN5$6N50v$tKEK(|j%)MB
zXUF)|V}}%@OBPuei#KOxcT>|cLOZtXZ1&qsQE?NxS8^;Sd(=~EwA=omT$RL>)|D2J
zeZa$5hU4y$zEw2s(T5@_r8;4!NXyanm#lIh8!pg?P+?KyT6h3d8GoYG3W9s{ltz7`
z`~;enTpvoIR@aP)qX8Cfym6W%J$s#o`|`ZsdcNMM%55B=y2{}w^vH{<o51$uMcC?R
z^`kkb<jSFO;@)!qC6IubuqO88w|#MI)lC~&>U!zn{NzEN;d(0xP-8oGuQ#B@!P<Cq
z_;hNCi=ol}sj}ZBXCChVA)YvV82b0o7EajDqU%w9j3$MJb=j@qyAC2ME>2L9A~v?>
zp)0(vL!uO*KA`Ar6Xx7^R59hs#}8>-j5l7tF{NeDb#$C5*2NLoVZKAnA^n5x0TLGv
zobBpzUXbcw2H9&AyUPv`O!o^7TfHJ}$X>&B`0^2JvG859(~He^bNi#dcrE1lWyE9a
z6DutO{6}RXT!y!eaL1pLmcRPsi}?*?|HBhe#<H$C&Gm+q;HacU{Vr}J=PM)K@q8d9
z$BFy;L$`=5419FG--g?=KCGJO_d^(H`cO6FB_N`)wDZz3duj`-1&&FTW!$}@^I;Dn
zAWnN<Zdc5Z%_7~jp?d+={O2^c4cIt_?L}P}TQ(rQq58ri9zPhmqom_uZp-p$R1KRa
z{zL6`*@y|s2%|z%cR_2oNWsH4rxx)CC}#R_ZIZJWo6@@NN#Xi={%D$8SkMkyJx&jV
zpVicP=m;Cz<-LM_L4{tR2NbnPb#63t-pgo#_hzsPS_MCJeBS@sV>9Z_=)#J?Sma+M
zUTZErvgLsn@0)m!x77zuNPTEu3EjiM*;($ZgQgKHk6Jypa}d!qO1+uR-;O$S_CuNt
z;}(w+A8m>CeX6gK+EUqyh?h!<=Te6YsDz+1o(dmYJnmo5J6%3=R(R3ML;Zu|*WSut
z)VRAM%~Llo<m{o1MybbayN>iNz|AMlV!TRg7CX+3&C9+Y-0^(oCE-(RHh#X10v&n6
zE6TY^G{Vehwz<xp+Dc2wWP8UAnA{TneH*f8J0^Z|=`{cQb^5dIn^<@G+aHe(g&zTm
zT=7#a^j*)h)wDg#*Z5Qv=f#C-!2rFjf8#)MYL0&dXzs#wyW?FKT>N5=-qPw++{YmX
zSD5e@b({NdO`KEI^zu;OzJ7>$RFp)jXKCSSqg-)!Pany+knGmRsvq!sDp^~KmhKUM
zU&Dy1;QSOrZk@M|Qu{jikWl3lUpxxG*eewlxG+wQv+U;95Kzol5R#eX6~`^%n5SD4
zn<@?L^YY@dkt)%L9~wN0+}yjySU5n!J$xUxf5w*vt1pr>QFqCH@=&B+Xnmo)=Na=C
zMf)?_!a5+>LHDc&x16J|mDShf>?xW4i~JHuGnAq8iK5-_TWp3U-P>9ZMR8U-hF*w=
zN`hj}3O>=&sN%?3`cSMj>O?_o@0nNyLo1L?f&|vva%IC^kM%W{8roV#B-NWGMJ)rU
zlEWL0j{aPg`torvydg(YZl3@9FAZx(=P!W;7~-6hF7XvHa&WWJ%nG?QXg-Lzzhkze
zzoy{naz!aK?{`MqM`!ldW+XKzxt;(gL-)fzgud`an9Zu1jF8SQ)v&qYmSANje<w?U
z0Z=8o8t1c2ZJ|*?!A#b3hSH6<4QrIZe%prRE~7TFbcGjTMC4Dy+IF`1E5#2x$-wUx
zj~B#5s`T>4LgC(Pm{_9d(DN1ur8#1kN1LVtx><evY@NjLqtiDk<c4j3BB{&21nF%?
zP{JJ#e_H8HSfVv)LDrGg^ui#=q0>Cs=}Xg=FouWSDm~+KNU7U&5;Ogm^jbx}o|G0j
zPu}BubIbzt4$^X2%u$O6iPy#@|GsVDG(!+QFUDN);Ff}F>&l}LXF7`glbr-FOI7ZN
zyXGAx*aV}dG!3lLG?0k=7<^*?2D|p3#b1zLiPH9pvCXR7(n|mQ+9Q2l>IM(X(UR4$
zylJGByM(rG-JwhmP3$uCD~w)yyd5k+fGr+ix2i4XD{8u-m6B4MiNhRNqmwBjB?vnk
z6aKIJzSb>!k+J4Mc3WfVuD6b1hcywT+X_>w$GvAa#6Zot@|Yk5o0O2H$lMos;`G=~
zo3pa;ebHI%S3{HPx)NC?oo{?<ke4bs^Q+xXvX7jQ;-VhbRV+o9=STsrG&&4CF8b(G
z@ldSY93Yvmc1q^fWH)VeOf2w!XUBBu2}t|wgL40Mq{QgmXPD8`vU<tq9nB9>S0e1!
zXsmlGy(^At<{?@ggc|urZ;V@^$zhxr(_#}J)m{zjetu|RTW+0g-`ncb;1vO5$CDQ}
zaWl)nEa){NQ}d8h8)ZR8MPEU3aM$M%pE9m59rMLj5y_~j6-{aNoAt(*Tg<_sAK<j0
zy$zf7#dYKx(l<|XD>K#j9|$#3TDzEK`WJSD^r`0jmsdHR=BpTah1{6{$rZ|X81Cvq
zOr1So7VcAAF)iDSk%wC1wLAtv8Xn8r%?i2Md8G625`&bGCr)n^h8Z&uwWQLtfAJf&
zqrF%M7{VV~!m!p^Pg`fqS+=(XHV1*!lcYFo(;XxAS~(*_=rJR^)A1T4hg6C9z-kuA
z;u-@v9Fy!b5GjG+*6Mtud94GIhmixMZw^BqSnWkdZFU=I-nF+~1&$2OBS5qK(=(pV
z(TO>g2IkY^4vY>koZg_Eok2vh?Z1QzW^S$TaAB8<<F!&qds{B)gj@mJ0^x|mlqEZG
zbz;)}b)P|=<kEtdPqV-uFG);UkQTy$im)^tub2g|WQ$7+=a%eTd0g|9<2A?-s5sL5
zxrEbp!`0)f)M9;|N>dmfI;-@I&RMb2C19)TPQ<H_E8unftL;qsW=tdw7Uaxklov8g
zjSt!U;k!~>{Gm;3T8J3ZAjiPzrF*~n9|9jaiS@*AK_-M`;1p{3+d__sMG5?4d@DJ+
zZ^MDhEjz!?Y>BKa+?`rp+0=goY}Uk{@W*eT&)w&Ig~ue5A_Gpk4gs&%9l$PU&E3wk
zGZ}yE$#+?^(evW>t4Mw1$T`Or&vS~O3%9#+TiWWpVu?C|qjJ#Li5y3_&C!)~*M{Dk
zC><=z2tq(f11TS%PNQhf`~Eq)m3zE+RCh(e@JL$!Ywof?Msn`@wAXZG$aw&e9d1lR
zF$FM3J<k-SylL|fXnl7F<`V?-xG7a-U^cpOz+xrcUM)>U*2$SKgLxm*N)C&32qTUz
z9p3b2Ji;;i){GknC2L1Ayb;;yh(w~S3I8R;mB;`yr?r0evZFD6$%{+Gu)5F=yueuk
zgpAL&eM1&WQ4ng9j@q0`ompDw<mVK}%gjaEP4Yl`rL%A}qze=+X&*$5`HjNwPd8K!
zNFZ*;sn3lzb{h1p!H$t>^wa-FS6%PVb|8^rku$Z{dH*wHOl|i7eOR{*%WuW8(dd3|
z>#E-b=t$0FbmTXQHrdQ6IyArWh2z|$`n;36Wyn?6`a1uGGCxSfXN5wvdGe)-U4}&U
zk-S|X&|kCreNoqH)5J3G!+pD47ozt<*e^9cYHVnxd5V?%Xg_Div0ow%ooscPOe)&<
z#<#P&5V_*3I`RCFIi#ZCI1~#WC!?hbn%0&~Zq#>7Ec1^*RZRzZu?l_N^Zy9MT#kQI
zChcn%J3`@-gYG}<Jk6Hi6j@-;2<b;^qK=ZU1bw4G4Od>sZ9~#-R6-5vGm)Coc6z5K
zDV^<<9L%dig%(|qY;d()NfhOmF>;zOW&C>Mm!{swhTPwL5bQ(9ZdM+$f_yJwp~-;>
zK~k<=!yl1?Q+C>hsyM@W@}|Y#zm+#Req--sik|st70cml5YDHkm27Q1<d|6QFNu!J
zchnQEIxd^iZeHCa|3YtM&EPLV&Vd^nZ{tSCLGGy5g1$B=MHX;tA{WJE_Z08dGxYNK
zd)t`Hs5CgM38Q_B4G(M1c03)d0NX=JPl+h`s$yoL)Q>rpU>$lEQGL$t@7qU%dv<pC
zSNHM0g*!GofwLrursSGGX7`=yul7@gwGF}h*3cSVUq}j{C8Q?*N@~B(zFg>l8v%N&
zKed%&2^pTy)MXU*pi@OIq{lf;AoL%?af#GjkM91i&$hjhck{geZ1KDJU2TEquI|xQ
zaH57LNUC<G-I#OJubSKr9eroZgK0gPUMd{++!?azDJ2JfOCAy?VT+{N4A_CrW7O|(
zpQV&OWPoY$hVUdn3E!A0Q{IWgHS@0T>YfYu<OU;-OhM}`j|U$&w-BX{ccqeC9UI|+
ziqF!&#AA9pWSgV{w<hfgDf#4Ii=s_i;(cl`Q>{lI+#0Sf{IDP%6(`itRR}h}NOBSO
z#8e5#KST|F@5z+4dgFY@Yb6f@hqJ3?sH$JnKx-u;N{gqzQd?gFNlW7=ke93IL{Mga
zkUu-1IMX;eh65JrNLnYD$o+GOGcn#*XR#1hfvw#ca+GDrZ!xEX-GA1C)E&Rqa>J(b
zR8&&I+FsrmSI6Lmv?N<%pszQk8iT`=$8XbyVDbdHk~1jfg_s_GG|?Gqz&M@p(sLTq
z<!PXEN7H8Teputr4+l^!6+3MX=w4XP^f2BtbpFvTuRVLh#qHgW-}>^MlOeYK#+8Go
zrd4zL>TfC)P@XO(eQrwd%B|mK=5TO}J=U+7bf!2kEOf=*opUA|RdH!)Y$%M|ew(VA
zlqyx7>717?<5=B!{sPu=4>1Av&eGi+jRxCQ`?^@}eNCU9!WP&q*(cvipT771)Y7ir
zO66>`jh+21z-hx9@VW^X+gGFeWmJ?#^!Fs`YxOCwn?_tJoNauDTjVKeS94DVuac_B
z*C94?RCoL!)8ixj1D<mmdO<2h3-<O4(8zUcG;2`z@`z@sb}ocEZ0DyID=EIRnhk>f
z)2A?J23x)T98%?0c5W})_OjQ&kd)#r&r5gb=H8GTJs_IsbE28wz0d2b6Av$CeFPIb
zQtx5g`;%MQ<>?+r(fE)%7^@~5c8NeL*As_I^{}?~;tFB={nP)X`aKHmKlo&~m<KET
z#rPvDr00VxN7$@X9;hCaZ;0TBeCTVB)C=G3Hqsvu+b8*{jX%SlGC7+p6`dv(8!#)+
z?ay1M>&jB123S@qwy}%RzVV~oxO)gq;M3OhkJHVz`HV{ZRgoRr<MEEQ<M>-HPKu9y
z%)xHI>1elV=tA0|{R1r@O(E&b`vmK(y-7rpqrA?Ee`{y5k4}Y>frFISUn{9Vj>_Sx
z(FF16kkf;0dLq;mFV5D`5`Rv#%TVjX2HRxkvU<b!(curZ3V(^MIMVm2=?K_zf@8Z!
zOS|Nb*7|grAWEFpbbP7B$jC#jh>E!o3zRF<zHfm^p#Qv&=cUeMPfe?k(ID;V!A(0v
zioXfF!+LwSrpI)+juemj@|J>FCq+5v2s*9WkDfmdoX3ljK}u*=^zN)n?e9eVYhBZZ
z+gJZV+t44dgwoS%^ody);4=4k^Ui~C8SfQBDf}Cy)%5V@yUe>%!pl*Kb4`_q3I5fa
z*L-u72T&|V#(QDUcsCZ#$iy;^X3f+`<!GvY)-I3}{wUuLcj`5bE;oAd)eWf8vc_yd
zL&}STRak&zvvc=tbccY?ZJYqD^+iQnONNGJgA9^{k>pedD^Wp-KU&L9qz$vI+rP_f
zg74x&QxJ{l6Uei}OTWZvaOd-IS$DWPDv&S$hG^wsR*IYgK;J#Y6Fq|oDk8#_MmFm?
zceo3|b>6gb8g8ir4SKF!b+sS~6SdSSDE#10?M{6QW5<5+d!lO(!VluJ$0-n!-4o<o
z^U)_LbznO(4)yKhoXc`AJ11iyKX92XTl$>|=CzfVS&b@|JN9evAfR32KnXJD;R@k#
zAvk<#jWfG#i*xtx5{IrMBKwZN0|*n9aR}cq?PG|xmx}!=h=QRBrb^5HvLIU8fF&<*
z0)M1unZHtyh!4T*=-)8T8@k5t66)VKF_iQ5+rxu3AC=son=z7JzsNWjOjdkzVfB5l
zOgFm#1I$koQaIrCtxVQ+bm#Mkbx0ZLvBZ!O5%JGb2Sk8BtJz$&J~@HQ6UZ2>QF8Oj
z^XGuXfu{Lvywy+n_FtSIb8D6F5;Ph5C>Awclv<IEZcAAUOptIwY~OL9dt5~2^T+9C
z>K||Z95wLsq1s2}3@z_sU5MHDLZjVc85jIQyKrDXgF`HBSc(kgT)<bz?0XU6)-;^P
zRTJ~%){H85@roxwCU7~$c_`%zciKQj`{wS>_H=!xkz!YteGm*t%izW3sJ==4TV-SO
zO%*$rIhi3=A?|(spzV}$>eJPZHE(M=9y){+q0GgX9~1c_LfdeLm%|+kdG6B2FLm#5
zjOf-4FOuSF4J|jiiYhc~;i~6|2TVz?PqA8w#oTocD(g6FVrsI<<uP}?OZSE<{8hFW
z_rL8@Myf*_64|G6w$6p4Q@gs^v`E3fv$6B(5ZW`Me@6cLwUX1I_{+5q{sf^3*<)sA
zh1)pGxH*UlEH<qdKIk-Px5puG>*ntOLK%EH_hiG$*CAfL+DK(-@_W%Au+E|)t*g)}
zi?h^WUUcCT^Y6n$Th=20*X7}_LsT3S#bOhc;j0KYUM**cc7%ID%2AOIGAom2Rqf<z
zH{13O2%x<8#OkDpzYY<A|2bKnVt>NlYZiO#jNb^V$a`7vW`?%7cz>(7x%moW2<377
z$jO@M{7Q!oJt6oiJ{ix>w=Hzer=HI|)u9${A0Io~?MCDepzGb^E00~etQp{*S$`9R
z2G&ngeS#E@{_dQy-7F%l=$IPwMg>LCMv7Op`%sXAL@W`No+%csxceSkTboZO0xn}=
zovBdx3zBz=>UydlrunHQ^sE1;Rxi94tKMyhcBLurouLL?!H$R`(<jo3a{j!PAJXj?
zzenDUc2+&RMNksnfkM%vvEMLx<0TrDY@-Ac=v*t2Xl7Pu+h?)n9&F}-ay5G%J?1pB
zULFYzzg-UtnR0n-P{qC2Pee{>dQD+`;k%@0q(IhO1uBmE$2G-T&0~?;_pikK>7NqZ
zbo}}Ix!(lciMqY+aB*3JPM0%3Rq~e=8ip24L1d>9-xCoyCak&9yXOTg)Et+V%VYg8
zW4^LN*tww$YLfI(V6fg9yh9|ieAZ(B9Zox!_qB+JCkbkPLeipl?cdMv<t-Cbbmsdu
zf9HAhlQ3Q}O+d6*cJq~x=-Ddkob30HX&e2|16FH<-Fsgbkbq17yb>h`bP3U-Zz{|E
zqeetPKMUWn8W|ta&Z(9YN+`+%W-SuK+~md{h<^Rvi?F+UQ;WHYYG%75t_RM05j*%a
zzcN`QR<|0lN;=)@6iTk>Eo|EY`mt@@nlZW{GvosimXK1xY8FccvK*ux-B@*~Zk-oy
zU7v=>)J@+muRvk`Col`DjehRQ-!9!rjeA&F*{HTl*sGMkLI!pQ_}?<L?7C*vF0OX`
zUN>LAS-NF~$Fb=8uqg^{BbN0sGyRs|B11|1no`-lzqtkF+|()@zy`EMi&YwpWbV~W
zFBQ3-cSDK`v%To)kzVcNn(n>Y=)eNiwG&5_Yk}I+>%3Tt-|xK|d3Rr*1FZ3G7sA}Z
z4;swbIx9et>xAbsW%wmf8PgI-8#Ag~egSTsjSRK#3Ia3Skwo_B|0XEqRaj8UZ@pfG
z1<ldabLEZhlr|072C^EsY7H~$9mAp;*EBAo#eCa|ooL-K+hZq~`jD?O?}i)9ul#-A
ziJolmB4B2;DMKUt$lS!Asg*iozXn<v63yDLZ?80SaVole_RE(i4fv2Ia3PiCdxW_z
zesRz8d(97F#ibl~Rr^{Vcldi=>Vm0+biWCF?&xTY$?qRJrRR|cAoMOAl4;9-rnW31
zaxyjhP*{N95>89XPu4r8sm``4y<ONXMAd?*KCX;(UW6A5Myu_BT;%@HZ%ubsJBr4J
z*f=gn<WVFt`s}FF7pXXLiAQressuWpb|@}N<-!Y~?*x&}@<EN@sgVbm_>S%!URp-j
z3-&+YtB8qdOmtj_wTvdV-)ha~n(hzZzZaSvl)!##h(tc-4kWRHtI^$er4^m!B$CC@
z`%OD^fA<Q^7g)SB2%ox}iCNVg%-VJHnR{RN-0whwcxaz{?-||Ww&&ps6|e>CPr~gz
zZ}@ifJqTK$nYHzxlbyUoa=^mruwaw(6T+e_FJkJg9e<&IM_Vb9mH)jeSf5CoSFER6
z&@iGs{0lFno|JVnq38a}e{u@E?|+Lxl<#uv(@Z3H#Ky8i(7;D<XoGfL5N{#eLZ~`s
zCGYU^D~fqJUYH(f`{1Ug+I#J5H?LcCspV&b&36<j2f{#7DvsTczmj-M*>56C-5YEp
zaj$JHc&Iy`@A_NTo2DyW?rc280VNc?gm!y&OFIhEX#QK%ur4UO+#7S@0tcfT{Jkq;
z&fLVhmJrmlRc7diJ-ikp7AFL^WC?_&7pUgb80^zf%++_ei4@V`^W`z`EtE(2q9TIj
zv`k>uX1jA|-gKbwWvq=2L^PgwGv(bx$K3b+lSjxFk%$RFp+DphGI-_P*jlNKaX<FH
z_~Q#l`I^ESwE<Y6$?o9)$P*|&k{v~9{Wp)mTlu?`^w!{2(%*^x7aeoIty8u)NTI?@
z-%to>qN||9f>zy-*=@j01w0#zK)<I8Ho@%pLk}N_gv^LGBz6^muN^*NHhS4%c=sIM
zru3LNz88I?yLf7K!JTW{DXBNNeYguDfOZxVz69~tvWw*TINtO|j`p^z1YR-v$q4s3
zgl39CX+cJ`fTrjh*X5kF1AAV$v9ls1P($8I=+vtSgyy`IeSj=4sB@quiZ9>~-}@1$
z?B>0U)KbJZ+;3Q_y63QbfZ9bBCkK7&5IPv+Ex(1D>e%HopFQB`YBdjqN?`Ns{njBS
zMtzL%ldC`|r}Pj5F8uv-qg9+;Z=q6V$a4GT_O!o&fx;oly=SvfMA!xI{6ebB6h+}I
z$A1SqSIqgm$#-P6D8xh|J}8O;17V7Q56zBhw^*|@x!S}ZLndYR;qE(@L%(Ggth2*N
z|LCZ7=t4fvDfXPz(*BXG)^7s4fr?j1kCe}!Y;74`X*4Q6-DdcEC^L7?c8|8wQb>FR
zW}rkt{TSrJo_T_oQDy~N?V<73@6YXZy{#@uwnDINDY6LjA^~{ux_ZQ5PHk`c-nV|s
z?~*8D{@t(?mcXa4k6mgoup&KL%Q}SW9c<On1sm4VK)Mq}!5Unz6bmljBniir%J(1o
zCDjC#iK50*mv?*iSPR#uHpd^Rx&|KRT9_!%sK=4i`ScD;_c4`q^S?MQ&arGpIl|Tp
z?$rhMi^JmS7fQ-XN0%5WWp>|RlT~yXDyjbcp|$>g!M1(GKC*89?!t!t;%ck?x+$$<
zP#;n6gd||HiTj+N>;X9z<DkCHG@<%&yO~jC$cI!3l4a{@5Qfx(6d+*+C*^L#lNSeN
zp2;e#2vcHWV@BnTs?;x(BQ?5w;$yBRIOxLoyJm2WUBM18XHaFG<CjlFF?Z!I$c(A5
zil4a!*6)Fx7+?Ok+>!XsL1DF99<wxI4W_@OL(HSUW<%D_oNcu^#h;+Oiqb<a$7B=8
zKjg2z`a8^A#QN`cIE$TG+L3qA>6|<|8lfmD5tAl`ww|m{19X;AT169LJ|DcD2LebG
zh+eR!$7@+aAwuqZ_4SxfgtCJ+3z?8?+C$0M$P6>4-O7Z!bE|q7;Z_-7%N`+EddC=@
zLp=A&|KQv8k7ZSML%LV*>#~`2m1dNh(W|48^>SIBFjMkk?EN?_V{R=h=+9SvFoGl^
zJ}5JadJd~T5~Se85bIp%JBzEW{MAL5q@GyB^i%th^-Ym&u$_sl?+)u`0j1}}4g(wf
zZ=o>IAB|kEs?S8xMGNj;q2c-l4kqiFV5zHFw6-YSEbhBw(uUA4mR%9n)_Gm?rf?l{
z4i|7%hK*+HUZGo)_{!fbz1(R59$*sK9{mt@fy<*o@F$Z;)aK;$fY~gxSxIvIyYC?W
zb55pc1`z(ol4Rvnk%ZIR_PiO!i|9mtoqngZtW;C1EpPeTazzl{;=W&UF=g0Qocl{E
zw&58wcVP)wjv=@py3O-zAOY`O4~Nvm^%M^~U$K$D8_nvdfDmi93~Gj>&&FziG-fdJ
zeB%6E$XwlkZ%Q7;H#?mc&p0~r-TdvsYp~;aN;D||8O}$zu>Ad}vdkkvDV@w$%7KC6
z2{PL%T#{mip(xXn#N3-c<D6eQXii0c|7LKYlW76_{@Do|e1trE5tJ441C(Z-fK)o7
zK}f^(Hj=|?JkwR=z*cFfw@D`<{}*-ZHz%z4bv&KZ6Xz?xe^Iv630aRSC~!93J@qaJ
z>*;oImYROF7aZVBE-4gzH^Mv6+(D4unz1?o_fYALRSE4t<>-mVT8#<P1{XPdJ`M-e
z^a!g--$f>%Cy7&ZVfVhyj*42?`OmoLU8-boe46*=3vx1X46AsftAO4c?=echij4`-
zDGyM;aA34U+xl{__jm>f8W6|?zkyO$hyI`YA$_F-KC??>-ZQHF1w=+lGWkk<n8ze4
z{vQfO<%3V5$olH9gKLkR?Yijh_IBj_D4$=XXxW-xctNg1=6C(E`QgkC+jLDmYqP0F
zV7W+psH@27GYbNXH<W_{x^C%T?4P&nPx{MyzwceZRzb^l@P77zV|sM@3{Rdv7$L0>
zli-S?IXDlgJ4_u#d5MHc#rJ;AAbmZ5_rBIgz>HKccMjD<^bvvR!&Jb8n1KplQ)Ffe
zLODsFVVZ95;|_&2<ec;w;j#}P^V2s6$<Q=|n7I;a!Q<hJxf#VDx>Ok~V+wMDTk!-E
zzk1+^6?mJY=I!BNJEjg8p}HM}P%*>@S6pxP?*r#QIw#Wg-sFHGq(ZRSJf3>e(w_gC
zdB&@~^*az%Y!#mfnF!^wYabLb^ldE}-Kz4&SO26Xi+G&~IurRf*$^@NuvlPZ1NaoY
zC6c{j%nwtwdRUJAYmo)eZUXhA;zpwq{&`Q|#wsB`o(3%{jCS;H`=TWT8z4}^rCYhk
z8Xh->3aI<gTR#B@FoHPbgooxMlcsND+vCq|mjy4}k)t-~Mp#|Cl{@%^T@RXnN4qAx
z73_?avsTC-Yl7fmltw2+gDJaSFHYNj;e<1UuxU0+CNfy9$wm6Z(fMTO#JX5Kp?mtw
znt{YB%aqwuAU#=yB@heVl%ydMn1!{w$9KP!{eLNPvjnEtN38T>#DfUV!?7`17%t`_
zPJ$w_929&e>kfhyP2)6)Zd#Pr|I=ZQ%QVV2{x@f=9I-Ot^iIcNyD%~EN2EfcX(Cs=
zIeA26y5b-bA+HA6+XExUYJ_%Yemu<wI|4r)D8gI*<`Vy~U2`zf<!0<l`?ri`AttwB
zTjC#TW14`vSZip$f7_05Ngj%CKan9XmZ4@G;8#s{xn3`0-r<1)%K0g)#`n)4l19J|
z6yA@C;t)?bJq2ZPD21SD9z+7@H@#?7|M^?4%ivN~eH&eyX(mBvuS5Tzw$|FiCn{*#
zk<&cS(kL*c4mE6%vWO0xiv$dJyk^c6KM&!YWW++?`2VAr_Q<&)m-b6eNd@e{>hKpB
zgzL3pCG`tw14ta+h@0bkZysT1U*-FIBacLfY_MMUsSWEqTFXN$OG~s4{Zp=z(_Old
zD8yg=vKL1?gbKNdl3RS~6nmGqV<|}j-UMUnppd{5skGxlHuA@`(BD(yT4RzCQvmwS
z9;A2x*>Zdk%0`&pt(kXsw1F*BidtiPKktiy34S|^D{x(KYw3S%)G>8&PDx{!;>Fv-
z;zP^*ceDklO#<5z^!YUxZrd`E3Qy|D24grsmY9sggD6c-n7PU+j)=^l`V;opuGOxd
z4*QqbsffIMDLZ)*C8HDWot|E$_=uN*w_lm(W>%N-fmgR?U^&W+<&=j$Vu#>#C-aGv
z8%4?3wHql6g;Wi1<)MmA-}*I3`;Vpa%s4N!)s*uou(&a^Sxyc9hltNx2Rqp&M^Uaw
zyFdTu#;VLPF?5iGqPa(fpt?Bc4oU;1PhT`LlQJ4XXR;tMwvb|x1fMRB3()T9g^=N)
zp=&Qt{JR4G`>sXK+eC6|O76krf;+6BHE5jTT<+;3g7)A}2cbkS=W}4On(O3JxRVVZ
z<%7vr&627t|CWLYe&9V!-!P9mEdOO_l(LcXDziuVJ?$)x%Remho6pqr?3sf{-*|s&
zorc`*4>c^Pxxi@E<U}|O1fQ(n6i5(JR!>jHB-^j_Di0sUW3}1YE=fgyA@1^HM##xP
z@>mnQFi*goT%ZyU?u-Xrpi3^W-EC9lS+6x858I0lS3g7flXVCgh1|11aHrjJ%n@s|
z6=?~jR`J(w!^QU4fU?{-MRspAawMer${`sD$zs^@c|SK+IHcUe?4TR8LP(y3)c0&4
z;#MsP@9gYIT{fVHwq>7HG_cJQ?-`tZ&3`ji2$4i9ZR%x&M<An<$9Gq4%<`KTWS)@o
z-%M7N^FEB7unAdVyas!0pcVkB-T%q;0M!r5l9)|z);8D<-3CVvwqv(5Q+2S_8*R1<
z`4A|9pBp1lJb@4Wq!zlrX@l<`XQgb4EcDdXor6Xw*=UG}GvC91wfB)WX>nXM5)MyD
zyF*qfeD;e~{Ez?~i@xQEqJ{G9r+1QU7b8eob0aJSmG#s$ywNCfeN*J=d6BISGlL0t
zmm^n+bB@Q@y>P1H+eEM*ULbX-4HYsXbC4niT2D>cHTWclizxok<27@lK~=B2!_@AN
z5p#JV;c1+@9Iget0Wt&2b3kLZLME#G1+&*3SYb479sv$OG+q<$ZM>#q*g_(S(4}vG
zYQ+ImH5g4TexbtQ<!nrFb_6mIOxJ2mR|hf>U+?l+WI^Jg#lv<dG|>jw7c2p;gZa(L
zSBvemh=9B`2i>#r)E<~2dh>vn!?Ww%v5C-@-`T1J`J;E|EU>-Bm~O5kU?mrckfP+{
zP-I1lp^EKK{1K?)Y9m9!6)sCnlt7X#AG!+o1INy;%kT?Kw%d6ut>N=T3y#W)8=KT2
zam>ql;9IMDdSH6eM&=)fiG|QFtZ#@v;E1Hihj;O!@<AB-)>Xvnqo^TAVVJN3>y*3w
zZkzaHSTlG_Yx%`kt=`5j;VU2BO^xCONLiyQHE`x2+UHPr*a|_MHzCYYKb+s}BoG-v
zm~lZZyEQTct`nbxeNiY>@+l7cH(=On)koNst{xxGQC64TsP_cU0B;^OgVymQdcCjW
zwVuIV%&um`J-`Key0_6s1UsT&d?P_TcvIxvk@c^M>lC_N`5x8q*U!ME>bMHg(+7^&
zCigDGI#8Qc%TK_M5cP;f)>5oags=g02m3K^!mFQv+rz<@C|3EaaX+>i-@x<TvD`jv
zJ0gm1coy6YPCUK+5Y@`QFB1fj(aRQ~aq36QzO;&Si_r=YJ80_s%lj(E3@Jx#@W@96
z<Z3jq+BuozH$PQc(YgaO%lG0(DfM24#1jFy06R%)1o@(c;w$iqVuDeu=#W7Yq+Bvl
zh+)SOldYhRSmP)c?EIMgAO+Y=+JSuoT|&LiZnPIRkH>%wMc*9QjgeZagWI)5^6Tg(
z6K6n4a=qA2{xVsRWdkgzY%cg^x)f$cKL;^iCf`%zYud;i^>g5*ih)}l>Bmat^y1@{
zt!Wjj9|NTwvytQ}-8Ep@Q`n&A?xDlp5qRr`IUj2q+d6cAKCZG{h+-S_IHo7U9C-9s
zAKM)(F7aX{A)QIsh;eE{qM_ph-zl1LL2LX0DYO!61&lEOxC{PRcFZ5U*4d7&q=&Ho
zG1%n_BgQ7;&H6olX5Yk<_mk~dS!onw!i=_KPH_~jx(=3bgcn%BoQo$Y&!6ySdUl~-
zQ$ZmsufWyJLNb2g7sIC)oE!6I*5oF5ylH-OOGa}nHYAhAr#6mgKaY|PLGH{t$*$&A
z0p@1*lQDP3w8P$}90)%>@7eM`5^3n~<i<=tfh;}V0{gQ#GILO{vCYqR8cZFJk1Ux?
zMQ*yryKV*iMCoZX@-OHEcV=t@*b<Il-pQ<Puube7+=cZg%6iP9!8=Ql($l%2)Um25
z^1|eaUspiEl~#(Qpzo_qkum&It{sSJv}#raTfL;^@WkD(*jY?q*0@8<@y3%b0G&;D
z2p(IN^7jR>+l;Lx#~)wt2q`r)Rbo0KTjZYX+q7oTlN{_R=?e_+l}jkVcjyxK)Y!Bf
zl)>dbL{XDnSdD%FM<9*y!yFOngFPT+#A|Yo(s~-0e|%>Xu|t~^k&ua?M3C-8#OTN0
zsppE<Gc<1gzDigD<$zJYq%q2tXTbM!Armj3K`uD+OB!QcUmzuk?~ax)CZp?+ihX9M
zG4@S6<r%|v+31mc`a+z3Wu5;ejiJe{8VFX!%ud2eatrdW5P?XaJAjX(y-~00#feNg
zuIXtDV0T=qnk}WA&yeAtlK{s=bHYJ9D?r;O9zWYe#S{a#aJ;L9?G@l)E*8bf58YdF
z_9ZP73H}8o08|$~5hZk4az|TI=Yk!;BseZ&I5>q+wSdqZx+${erUe?ZX01pIECDyI
z4qs|-;mL1+^60W{&e8+rbjrDy&$Z>h__PG%gmKuR4dGcuXyE^K%8f}>Qrs^Z&yrmS
zt9gXIlE@8MjU@1mqKYS0gSAx|Z7!!focwSaY618pMC-DnKb3(qJ}sdg$U{-q5{>+`
zQN*D*@lCuylc^~;$op>TAA)aRLDr8BEU<6*avlRCyH-q+9eUn_p*e?u!%LwMq#eSh
zAoBcI^RF6DX0+t7DNF&`_g44l&*^|iULDJ~N62tLkx=G&xg~1pTiPFLa#8j|f6f$L
zb9i}M`E*^Ooj;!EeB$lpysjozGnJdCVTOi@cDcIs+RKYh9nCsA`c>D~HTMrm%aKbJ
zvn-JnI7QuL$R34%vhaCqA=l!X&iUw>9O|Xc!;_5i2XK*)F@8WKXNGh9fs~u&_TSKO
z;30cP-2R7X*a{E-$?e={nH-_B1ZQncuSef~kA-Kvs4jdmSmb@aJxi(6QXC;wKC&p)
zI^m?iO(poki=S53x)s#EP7FN;NKv+L%HlrDp~DFG@H6pRzCrDlA5I*`1}z#@8TUB_
zP5cS>Oc2S>w{PxDjKMx;iet>lhn{#4KLaXK+~bgJgv70UbsZyaAOjh59e$RF_jp56
zg+r^cP1Sy+owN((R*a)3k>0Py4?=OS`&ltErB4iLBxK2H_CPo<?(VLY@FA<hLkIm{
z+xFeVoe|fWp@L?>aH7&bD~={t>R~@{-S8yiM<qgf1V56aeW)SHIFr4S)vx(b@%5Fg
z@pe%Ax0<?}%4k}fuA_`~0*C?Z!GJKYER_1u<SpV(c6*=&HsRGj&&jt>>MTpEO^pH4
z;r(?Cr2^~2XcdY0u(IeHSVi7jvWi>J?Q4Lwp4*VMisC2Xo_{@Dn;M6CceQj|YcYD+
z5p5$`2r%GivZVwTm`d*{7G`=AexT`jdA_}DP3$WSI9<<*@go;4vIjrT7ZrcAJmnf<
zI{l7i{K=SAut?<!eEzw`_*9i=agjqM;rSF;&1pm}Ii)P`aJ@Y8LkwAoOquI`EtJX7
zC*cF_g51vWqcWXkIMn3SRyah&ovitSu#jWf{aN|;_BF9&EpHd4iin_`gSip|2mC!~
zDPt7x4Ode_`R5qx`i{bX?7_VGgKZ!&eJnAGl~7@&34>0!@c?~sbEktU7ksC!JQ+vU
zLgirv>UbQ~ZdYTw*57X}3{$>z#!1G@&+s2z9j|r4GF&B?EMZAaxrV&Odi+2LJ?v6y
zcl}ZX;^W}3<sf6-qTTR6QwS5f?eD?1)*ZF5Y>M89Uv0JkKM6wraw(N@y*LxE{*g(_
z$%l?eJ*>GglrSuB^cw2`)>Uga=km6A6u@raqvOm4tgMQ#1Mr<%5M3JgITK0h5q@br
z{UgkZ<Y9+Rn)&ybNz?XX6<F^<w2r?8p2tSELFHntV_IykY8`!<OyqyF_vT^)D@NuM
zbnM^}YDYuSRbk?nTd=!KzR8FLrOk!vje3M1YoljeN=;fA>A4(!p<dX5$&$rP$BVf;
zZ_rY~$R5k;d4Zy3l`rs)rj7~nEdYBiKtk8@`5brB60@5Zvs>dR*mILJ+YprhY6EGl
zLzcZ4OG`R5sJ#-FN_eZ4IP5bNfuZjaXv>DA;edy-7^}#)ky96Y;!euj066_AzQ7Hr
zikX9+=%ns79cL?$33=Q6J8oxtR)$YnQY^9u<yCgZ&OLSdNOzUV5&EDJR&V{VSew#y
zv%@ps5}HM8X7^EfrJ?!&*9u>llO|&9i++FO)K?f;b+b6HW*})THIwy&(e6o6Z&ODG
z!-@h%`loHC7R1eBBz2aLumoL8?<2@#_yLNe!e^#A_`!pisHxJ-IndaPisHcR^q8GE
zDH%1TLCw7K$cP9TnVUGt33)U$A~+kt+#-*tedI};G=%gYjm&au1=5ybxhmVZwNZf3
z_ac4a^GB%V295NG+3uac@Hu)_Dd0c9V>(jz?c`PEwbLuk&j+G}pmWyq>lj2FyQZv5
z0CIH<gad(O;b)L&EgS#>*B}rPf<+vQKMSykjA2p1Sh5I~7QixxjYWiDS%R^M02Yxk
zEGig_2*FYfSmv;?WDzV&FcuNOk~M}!17i^(SgHXFYHorE%OY4rF_!0mC2I_e2FCK7
zU}*v@3)on;6D*<_%X7f8eGJPMjO97O(gav|*;uw?ER+rwa=&uJ%%@w3Sd^Dwv2>8;
z6^e_`OxpvG#L=L=o5{g<b|5eZ(&)xW=*%>4@Y#4|F&<fu11=Q_LRN=Isg$9ymbA=4
z6nPimP(*Emu5_otmE)1Qc;pd*5k@0y7#<DOABHj`kAB~GYg1kZYV{uC+$Kz#^&eQv
z47NAS+%kq{6`o9kOg0xZbMF|M*<^$~GdCABb1xgldNCfEjL|6b1j$(=H-XTYVP#e(
zj&m}Ap!d!qnyr^$kQ{<W2GE3zjVvQ01dR;9d|;1saN-eh0>cF`m)TNtIFFQ;;=_TD
z<_^NCXOHx|;gLD`Swz|vkaP&x3lBWcC{IXJfv;dx^Md4}APGVuc<xU?$YfilR|Hvt
zXs-xcUp|giK<FTr^!7*j3sI!SAU80`YJx?BqasLh3<N8h&{7PNbsa$1Dj-6ztR`4A
z01N7uj<6geSPl~q!19TWMTB7CBv@1ci})A_#u6xmK|}z{CpMNWf`yY{QJG$`7$t%T
ziv`9KXhJ{$OBWkU7QrG)u<YQdSd5zLVGx342?lu%Si0C)wi7I(1j`P<a*_>VO0XOv
zAb_QSjb%H*f;EO}2MF}CZiK`XBcaX0VxezyYr8d-=u&z$R$00k8HsUgixOv#93mqg
zWMq?yy&f_waB~Okur)FFVf;)H6?<RQ90HB3BO}B$Gnc{0WiqlDdjM)ZvU^AfJn|PF
zVjKE9LRt)b1*hr(oEbx7Lnb53G?@dFp=MbK&1yVivKkX$vIj=kFy9Z65gR-r<d(ls
z60$v{OzR(FW#Yie9|bb|F3;x8UCd3IHJOa>W)Leavs?BAy%`pcRC?Q76d<S=@*sT7
zU|Wc>t3itEm*H(E=-KRRBC*<2G34=c51zzyq|S)*{ABhFupC&PD&U3Jh~tqwvhWPD
z@MRno-;r`67!r7tS7XjA?{RDE0Jn=4z6m>?r{o#uWoiw((CdqU#$6d$R?5)?qQI@~
zD{AusK}>r)23bZPH8$PUu<IlnM3sQ7BOp2ef-)lnqKrXI2^Lep(#QsROF-fX$TzpP
zdu%Mc1dA!b!V6f;*&xItvip)N?*<Sy7G8pdmtf%qENyI%OoHVV!Scnet(J|&lwd)<
zpFkekfCaUNLd)MuK-LnFeE`D7VoI=R6D-<*rHKvlj)24xkRrFXr)(_C2o`OEC2CIM
zU3jS~a=UkVu%o0eg|Cnu%~i4A134kmtvN#YWOP3XNzA`h?7blYMI(I6@Q4HH+Q6iO
z<kUfpkgMk6!CIA!(^1NLa**6<P#!cQLv(3@8eXRLpD@CPks&aT*w0r_4U+r9UX2hT
z?LJ-&#m|#C7iBG-liMZ}5CZ}t{YQ}8UN)M!1dRp?t8i~^F^sU$%q1`yq@f>=azkz~
zL6bWLGmY9r(42Q`!)Yy`c|<gmhk!^=3X%(9$2VazSq>Y`(jd7H?2&Ek5dsqhyoqEJ
zFb!J<#Yegs>D|ir^C5x9&x}1n9u3`2%lAX?O^2++PneK)mH578TN-jl*&|-q@P#&D
z`=(8vUO})p7O^3K9ALmQjRqt1iZ2kft{OIgTkEl)m8ZJ3^{_$q5s>=?iRqRGqgiYa
zvi@6t6Oc=8ZFPjrj?ktGW~c5GET(`(l7R5Dv5<yI%6|X|8_RA2;!d#e0v7asB_NES
zV9}aKumJxD*jT=?K?oLJjRt@ATFeL*E%sWb00<k44gv8ZShTk^__Nny#>T>4i@RG}
zBO6Nr8-!rd1}yBg2(z(}wNNU3lL_w?R$_O|E>tN<<NVo65hh6J+lVST!8?vY_0b}w
zc3|$)W5`JI_J-62>=6|*vI$F)GFpBnAb|Kqv*p;#m?C84#)>lmH`ybL$;dJ?vTMbe
zn<OFW(;}qh5+!3=sPwIeYry@^VuO%~Oi99IZ1*cn(3s32ldV{KCg27^BTQgS_OM5U
z&!jyfBT`E+0Ybz#(4<xRx(RQF#R&l_@D&t<L->pmyNuB!ASQfdvZG7SD6t8+$sQqD
zF$J}cD;s7Ov1WGZLqYA6#x8wG<wHf0(ueYxo9N0eeMsdqR{Bs-`;4+n9}<Arr4NN_
zA0WkdbKQB|>2s{~p|<OM4oX7H+&4b~ccx^QJ`}CCXkH>07^cyYF%VMvP+41dvO(ss
zK}hLC(OSOp5RmWVAcQknIv)whUt?HE=>s5MY%B-IKuGCBiELG4gN$Jzr4Q`4-(zF>
zF%Ckoi0I@JkRxMQNa+J0b!;q0$3RHwLp|TRiw!b{g_J%Nt*MrcrFtBMV0o_diGVC0
z!$L|Q010MeIm8BGmp)X#P;kB>a^_Iw;ZB+qyY!(HZ>uy&qBz4ft&QlC11WtdqbO7M
z$Q1SnyY!(V^_HCBhd7gbe-gC%L!OCU`cT^*$goHLWRI{*A4>SaQZmA%8)6G&mp&Bz
z{*ul1$Tqn+*`%>cAId<4jpphY8g}VJ&Aq>xOm=k)jRuKL)ZB+`7_KoGQu<KJx71ZI
z8aWwj)VUjW7f9(tO_mcTlg%9iVV6FX%!cJ;Wa;<_8;y+I3NoTQMgY6?p}02iVhyb1
z8WRC2ec%|10zVlM9vdO04@KJ|KcfPAk?^#y8Y7LAKGZGWc9+4PyecQwa69ZIzFIV?
z&yg@3G5z1qza03N1OIa1Uk?1=Ina^T%cSA`=KRs(Ru1j|`wag={L6v=pByle(tq6i
Ut?=@ETpFsW?%EQeVshdC0mKzQ{Qv*}

literal 0
HcmV?d00001

diff --git a/metadata-ingestion/docs/sources/dynamodb/dynamodb_post.md b/metadata-ingestion/docs/sources/dynamodb/dynamodb_post.md
new file mode 100644
index 0000000000000..7f9a0324c7bc6
--- /dev/null
+++ b/metadata-ingestion/docs/sources/dynamodb/dynamodb_post.md
@@ -0,0 +1,29 @@
+## Limitations
+
+For each region, the list table operation returns maximum number 100 tables, we need to further improve it by implementing pagination for listing tables
+
+## Advanced Configurations
+
+### Using `include_table_item` config
+
+If there are items that have most representative fields of the table, user could use the `include_table_item` option to provide a list of primary keys of a table in dynamodb format, those items from given primary keys will be included when we scan the table.
+
+Take [AWS DynamoDB Developer Guide Example tables and data](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/AppendixSampleTables.html) as an example, if user has a table `Reply` with composite primary key `Id` and `ReplyDateTime`, user can use `include_table_item` to include 2 items as following:
+
+Example:
+
+```yml
+# put the table name and composite key in DynamoDB format
+include_table_item:
+  Reply:
+    [
+      {
+        "ReplyDateTime": { "S": "2015-09-22T19:58:22.947Z" },
+        "Id": { "S": "Amazon DynamoDB#DynamoDB Thread 1" },
+      },
+      {
+        "ReplyDateTime": { "S": "2015-10-05T19:58:22.947Z" },
+        "Id": { "S": "Amazon DynamoDB#DynamoDB Thread 2" },
+      },
+    ]
+```
diff --git a/metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md b/metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md
new file mode 100644
index 0000000000000..a48e8d5be04aa
--- /dev/null
+++ b/metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md
@@ -0,0 +1,26 @@
+### Prerequisities
+
+In order to execute this source, you will need to create access key and secret keys that have DynamoDB read access. You can create these policies and attach to your account or can ask your account admin to attach these policies to your account.
+
+For access key permissions, you can create a policy with permissions below and attach to your account, you can find more details in [Managing access keys for IAM users](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html)
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Sid": "VisualEditor0",
+      "Effect": "Allow",
+      "Action": [
+        "iam:ListAccessKeys",
+        "iam:CreateAccessKey",
+        "iam:UpdateAccessKey",
+        "iam:DeleteAccessKey"
+      ],
+      "Resource": "arn:aws:iam::${aws_account_id}:user/${aws:username}"
+    }
+  ]
+}
+```
+
+For DynamoDB read access, you can simply attach AWS managed policy `AmazonDynamoDBReadOnlyAccess` to your account, you can find more details in [Attaching a policy to an IAM user group](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_groups_manage_attach-policy.html)
diff --git a/metadata-ingestion/docs/sources/dynamodb/dynamodb_recipe.yml b/metadata-ingestion/docs/sources/dynamodb/dynamodb_recipe.yml
new file mode 100644
index 0000000000000..bd41637907b5c
--- /dev/null
+++ b/metadata-ingestion/docs/sources/dynamodb/dynamodb_recipe.yml
@@ -0,0 +1,25 @@
+source:
+  type: dynamodb
+  config:
+    platform_instance: "AWS_ACCOUNT_ID"
+    aws_access_key_id: "${AWS_ACCESS_KEY_ID}"
+    aws_secret_access_key: "${AWS_SECRET_ACCESS_KEY}"
+    # User could use the below option to provide a list of primary keys of a table in dynamodb format,
+    # those items from given primary keys will be included when we scan the table.
+    # For each table we can retrieve up to 16 MB of data, which can contain as many as 100 items.
+    # We'll enforce the the primary keys list size not to exceed 100
+    # The total items we'll try to retrieve in these two scenarios:
+    # 1. If user don't specify include_table_item: we'll retrieve up to 100 items
+    # 2. If user specifies include_table_item: we'll retrieve up to 100 items plus user specified items in
+    # the table, with a total not more than 200 items
+    # include_table_item:
+    #   table_name:
+    #     [
+    #       {
+    #         "partition_key_name": { "attribute_type": "attribute_value" },
+    #         "sort_key_name": { "attribute_type": "attribute_value" },
+    #       },
+    #     ]
+
+sink:
+  # sink configs
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 3067ccd71f92f..a119eba25be2a 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -309,6 +309,7 @@ def get_long_description():
     "dbt": {"requests"} | aws_common,
     "dbt-cloud": {"requests"},
     "druid": sql_common | {"pydruid>=0.6.2"},
+    "dynamodb": aws_common,
     # Starting with 7.14.0 python client is checking if it is connected to elasticsearch client. If its not it throws
     # UnsupportedProductError
     # https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/release-notes.html#rn-7-14-0
@@ -557,6 +558,7 @@ def get_long_description():
         "dbt = datahub.ingestion.source.dbt.dbt_core:DBTCoreSource",
         "dbt-cloud = datahub.ingestion.source.dbt.dbt_cloud:DBTCloudSource",
         "druid = datahub.ingestion.source.sql.druid:DruidSource",
+        "dynamodb = datahub.ingestion.source.dynamodb.dynamodb:DynamoDBSource",
         "elasticsearch = datahub.ingestion.source.elastic_search:ElasticsearchSource",
         "feast = datahub.ingestion.source.feast:FeastRepositorySource",
         "glue = datahub.ingestion.source.aws.glue:GlueSource",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dynamodb/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/dynamodb/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py b/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py
new file mode 100644
index 0000000000000..6b7c118373673
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py
@@ -0,0 +1,469 @@
+import logging
+from dataclasses import field
+from typing import Any, Counter, Dict, Iterable, List, Optional, Type, Union
+
+import boto3
+import pydantic
+from botocore.client import BaseClient
+from pydantic.fields import Field
+
+from datahub.configuration.common import AllowDenyPattern
+from datahub.configuration.source_common import DatasetSourceConfigMixin
+from datahub.emitter.mce_builder import (
+    make_data_platform_urn,
+    make_dataplatform_instance_urn,
+    make_dataset_urn_with_platform_instance,
+)
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.api.decorators import (
+    SupportStatus,
+    capability,
+    config_class,
+    platform_name,
+    support_status,
+)
+from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceCapability
+from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.source.schema_inference.object import SchemaDescription
+from datahub.ingestion.source.state.stale_entity_removal_handler import (
+    StaleEntityRemovalHandler,
+    StaleEntityRemovalSourceReport,
+    StatefulIngestionConfigBase,
+    StatefulStaleMetadataRemovalConfig,
+)
+from datahub.ingestion.source.state.stateful_ingestion_base import (
+    StatefulIngestionSourceBase,
+)
+from datahub.metadata.com.linkedin.pegasus2avro.schema import (
+    ArrayTypeClass,
+    BooleanTypeClass,
+    BytesTypeClass,
+    NullTypeClass,
+    NumberTypeClass,
+    RecordTypeClass,
+    SchemaField,
+    SchemaFieldDataType,
+    SchemalessClass,
+    SchemaMetadata,
+    StringTypeClass,
+    UnionTypeClass,
+)
+from datahub.metadata.schema_classes import (
+    DataPlatformInstanceClass,
+    DatasetPropertiesClass,
+)
+
+MAX_ITEMS_TO_RETRIEVE = 100
+PAGE_SIZE = 100
+MAX_SCHEMA_SIZE = 300
+MAX_PRIMARY_KEYS_SIZE = 100
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+
+class DynamoDBConfig(DatasetSourceConfigMixin, StatefulIngestionConfigBase):
+    # TODO: refactor the config to use AwsConnectionConfig and create a method get_dynamodb_client
+    # in the class to provide optional region name input
+    aws_access_key_id: str = Field(description="AWS Access Key ID.")
+    aws_secret_access_key: pydantic.SecretStr = Field(description="AWS Secret Key.")
+
+    # This config option allows user to include a list of items from a table when we scan and construct the schema,
+    # the key of this dict is table name and the value is the list of item primary keys in dynamodb format,
+    # if the table use composite key then the value should have partition key and sort key present
+    include_table_item: Optional[Dict[str, List[Dict]]] = Field(
+        default=None,
+        description="[Advanced] The primary keys of items of a table in dynamodb format the user would like to include in schema. "
+        'Refer "Advanced Configurations" section for more details',
+    )
+
+    table_pattern: AllowDenyPattern = Field(
+        default=AllowDenyPattern.allow_all(),
+        description="regex patterns for tables to filter in ingestion.",
+    )
+    # Custom Stateful Ingestion settings
+    stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
+
+
+class DynamoDBSourceReport(StaleEntityRemovalSourceReport):
+    filtered: List[str] = field(default_factory=list)
+
+    def report_dropped(self, name: str) -> None:
+        self.filtered.append(name)
+
+
+# map attribute data types to native types
+_attribute_type_to_native_type_mapping: Dict[str, str] = {
+    "N": "Numbers",
+    "B": "Bytes",
+    "S": "String",
+    "M": "Map",
+    "L": "List",
+    "SS": "String List",
+    "NS": "Number List",
+    "BS": "Binary Set",
+    "NULL": "Null",
+    # if the attribute type is NULL the attribute value will be true or false.
+    "BOOL": "Boolean",
+    "mixed": "mixed",
+}
+# map DynamoDB attribute types to DataHub classes
+_attribute_type_to_field_type_mapping: Dict[str, Type] = {
+    "N": NumberTypeClass,
+    "B": BytesTypeClass,
+    "S": StringTypeClass,
+    "M": RecordTypeClass,
+    "L": ArrayTypeClass,
+    "SS": ArrayTypeClass,
+    "NS": ArrayTypeClass,
+    "BS": ArrayTypeClass,
+    "NULL": BooleanTypeClass,
+    "BOOL": BooleanTypeClass,
+    "mixed": UnionTypeClass,
+}
+
+
+@platform_name("DynamoDB", id="dynamodb")
+@config_class(DynamoDBConfig)
+@support_status(SupportStatus.TESTING)
+@capability(
+    SourceCapability.PLATFORM_INSTANCE,
+    "By default, platform_instance will use the AWS account id",
+)
+@capability(
+    SourceCapability.DELETION_DETECTION,
+    "Optionally enabled via `stateful_ingestion.remove_stale_metadata`",
+    supported=True,
+)
+class DynamoDBSource(StatefulIngestionSourceBase):
+    """
+    This plugin extracts the following:
+
+    AWS DynamoDB table names with their region, and infer schema of attribute names and types by scanning
+    the table
+
+    """
+
+    config: DynamoDBConfig
+    report: DynamoDBSourceReport
+    platform: str
+
+    def __init__(self, ctx: PipelineContext, config: DynamoDBConfig, platform: str):
+        super().__init__(config, ctx)
+        self.config = config
+        self.report = DynamoDBSourceReport()
+        self.platform = platform
+
+    @classmethod
+    def create(cls, config_dict: dict, ctx: PipelineContext) -> "DynamoDBSource":
+        config = DynamoDBConfig.parse_obj(config_dict)
+        return cls(ctx, config, "dynamodb")
+
+    def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
+        return [
+            *super().get_workunit_processors(),
+            StaleEntityRemovalHandler.create(
+                self, self.config, self.ctx
+            ).workunit_processor,
+        ]
+
+    def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
+        # This is a offline call to get available region names from botocore library
+        session = boto3.Session()
+        dynamodb_regions = session.get_available_regions("dynamodb")
+        logger.info(f"region names {dynamodb_regions}")
+
+        # traverse databases in sorted order so output is consistent
+        for region in dynamodb_regions:
+            try:
+                # create a new dynamodb client for each region,
+                # it seems for one client we could only list the table of one specific region,
+                # the list_tables() method don't take any config that related to region
+                # TODO: list table returns maximum number 100, need to implement pagination here
+                dynamodb_client = boto3.client(
+                    "dynamodb",
+                    region_name=region,
+                    aws_access_key_id=self.config.aws_access_key_id
+                    if self.config.aws_access_key_id
+                    else None,
+                    aws_secret_access_key=self.config.aws_secret_access_key.get_secret_value()
+                    if self.config.aws_secret_access_key
+                    else None,
+                )
+                table_names: List[str] = dynamodb_client.list_tables()["TableNames"]
+            except Exception as ex:
+                # TODO: If regions is config input then this would be self.report.report_warning,
+                # we can create dynamodb client to take aws region or regions as user input
+                logger.info(f"exception happen in region {region}, skipping: {ex}")
+                continue
+            for table_name in sorted(table_names):
+                if not self.config.table_pattern.allowed(table_name):
+                    continue
+                table_info = dynamodb_client.describe_table(TableName=table_name)[
+                    "Table"
+                ]
+                account_id = table_info["TableArn"].split(":")[4]
+                if not self.config.table_pattern.allowed(table_name):
+                    self.report.report_dropped(table_name)
+                    continue
+                platform_instance = self.config.platform_instance or account_id
+                dataset_name = f"{region}.{table_name}"
+                dataset_urn = make_dataset_urn_with_platform_instance(
+                    platform=self.platform,
+                    platform_instance=platform_instance,
+                    name=dataset_name,
+                )
+                dataset_properties = DatasetPropertiesClass(
+                    tags=[],
+                    customProperties={
+                        "table.arn": table_info["TableArn"],
+                        "table.totalItems": str(table_info["ItemCount"]),
+                    },
+                )
+                primary_key_dict = self.extract_primary_key_from_key_schema(table_info)
+                table_schema = self.construct_schema_from_dynamodb(
+                    dynamodb_client, table_name
+                )
+                schema_metadata = self.construct_schema_metadata(
+                    table_name,
+                    dataset_urn,
+                    dataset_properties,
+                    table_schema,
+                    primary_key_dict,
+                )
+
+                yield MetadataChangeProposalWrapper(
+                    entityUrn=dataset_urn,
+                    aspect=schema_metadata,
+                ).as_workunit()
+
+                yield MetadataChangeProposalWrapper(
+                    entityUrn=dataset_urn,
+                    aspect=dataset_properties,
+                ).as_workunit()
+
+                platform_instance_aspect = DataPlatformInstanceClass(
+                    platform=make_data_platform_urn(self.platform),
+                    instance=make_dataplatform_instance_urn(
+                        self.platform, platform_instance
+                    ),
+                )
+
+                yield MetadataChangeProposalWrapper(
+                    entityUrn=dataset_urn,
+                    aspect=platform_instance_aspect,
+                ).as_workunit()
+
+    def construct_schema_from_dynamodb(
+        self,
+        dynamodb_client: BaseClient,
+        table_name: str,
+    ) -> Dict[str, SchemaDescription]:
+        """
+        This will use the dynamodb client to scan the given table to retrieve items with pagination,
+        and construct the schema of this table by reading the attributes of the retrieved items
+        """
+        paginator = dynamodb_client.get_paginator("scan")
+        schema: Dict[str, SchemaDescription] = {}
+        """
+        https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb.html#DynamoDB.Paginator.Scan
+        Note that the behavior of the pagination does not align with the documentation according to https://stackoverflow.com/questions/39201093/how-to-use-boto3-pagination
+
+        What we'll do is to create a paginator and boto3 library handles the pagination automatically. We'll iterate through pages
+        and retrieve the items from page.
+
+        The MaxItems is the total number of items to return, and PageSize is the size of each page, we are assigning same value
+        to these two config. If MaxItems is more than PageSize then we expect MaxItems / PageSize pages in response_iterator will return
+        """
+        self.include_table_item_to_schema(dynamodb_client, table_name, schema)
+        response_iterator = paginator.paginate(
+            TableName=table_name,
+            PaginationConfig={
+                "MaxItems": MAX_ITEMS_TO_RETRIEVE,
+                "PageSize": PAGE_SIZE,
+            },
+        )
+        # iterate through pagination result to retrieve items
+        for page in response_iterator:
+            items = page["Items"]
+            if len(items) > 0:
+                self.construct_schema_from_items(items, schema)
+
+        return schema
+
+    def include_table_item_to_schema(
+        self,
+        dynamodb_client: Any,
+        table_name: str,
+        schema: Dict[str, SchemaDescription],
+    ) -> None:
+        """
+        It will look up in the config include_table_item dict to see if the current table name exists as key,
+        if it exists then get the items by primary key from the table and put it to schema
+        """
+        if self.config.include_table_item is None:
+            return
+        if table_name not in self.config.include_table_item.keys():
+            return
+        primary_key_list = self.config.include_table_item.get(table_name)
+        assert isinstance(primary_key_list, List)
+        if len(primary_key_list) > MAX_PRIMARY_KEYS_SIZE:
+            logger.info(
+                f"the provided primary keys list size exceeded the max size for table {table_name}, we'll only process the first {MAX_PRIMARY_KEYS_SIZE} items"
+            )
+            primary_key_list = primary_key_list[0:MAX_PRIMARY_KEYS_SIZE]
+        items = []
+        response = dynamodb_client.batch_get_item(
+            RequestItems={table_name: {"Keys": primary_key_list}}
+        ).get("Responses", None)
+        if response is None:
+            logger.error(
+                f"failed to retrieve item from table {table_name} by the given key {primary_key_list}"
+            )
+            return
+        items = response.get(table_name)
+
+        self.construct_schema_from_items(items, schema)
+
+    def construct_schema_from_items(
+        slef, items: List[Dict[str, Dict]], schema: Dict[str, SchemaDescription]
+    ) -> None:
+        """
+        https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb.html#DynamoDB.Client.scan
+        each item in the list is a dict, the key represents the attribute name,
+        and the value is a one entry dict, more details in the below code comments
+        we are writing our own construct schema method, take the attribute name as key and SchemaDescription as value
+        """
+        for document in items:
+            # the key is the attribute name and the value is a dict with only one entry,
+            # whose key is the data type and value is the data
+            for key, value in document.items():
+                if value is not None:
+                    data_type = list(value.keys())[0]
+                    if key not in schema:
+                        schema[key] = {
+                            "types": Counter(data_type),
+                            "count": 1,
+                            # It seems we don't have collapsed field name so we are using attribute name here
+                            "delimited_name": key,
+                            "type": data_type,
+                            "nullable": False,
+                        }
+                    else:
+                        # update the type count
+                        schema[key]["types"].update({data_type: 1})
+                        schema[key]["count"] += 1
+                        # if we found an attribute name with different attribute type, we consider this attribute type as "mixed"
+                        field_types = schema[key]["types"]
+                        if len(field_types.keys()) > 1:
+                            schema[key]["type"] = "mixed"
+
+    def construct_schema_metadata(
+        self,
+        table_name: str,
+        dataset_urn: str,
+        dataset_properties: DatasetPropertiesClass,
+        schema: Dict[str, SchemaDescription],
+        primary_key_dict: Dict[str, str],
+    ) -> SchemaMetadata:
+        """ "
+        To construct the schema metadata, it will first sort the schema by the occurrence of attribute names
+        in descending order and truncate the schema by MAX_SCHEMA_SIZE, and then start to construct the
+        schema metadata sorted by attribute name
+        """
+
+        canonical_schema: List[SchemaField] = []
+        schema_size = len(schema.values())
+        table_fields = list(schema.values())
+
+        if schema_size > MAX_SCHEMA_SIZE:
+            # downsample the schema, using frequency as the sort key
+            self.report.report_warning(
+                key=dataset_urn,
+                reason=f"Downsampling the table schema because MAX_SCHEMA_SIZE threshold is {MAX_SCHEMA_SIZE}",
+            )
+            # Add this information to the custom properties so user can know they are looking at down sampled schema
+            dataset_properties.customProperties["schema.downsampled"] = "True"
+            dataset_properties.customProperties["schema.totalFields"] = f"{schema_size}"
+        # append each schema field (sort so output is consistent)
+        for schema_field in sorted(
+            table_fields,
+            key=lambda x: x["delimited_name"],
+        )[0:MAX_SCHEMA_SIZE]:
+            field_path = schema_field["delimited_name"]
+            native_data_type = self.get_native_type(schema_field["type"], table_name)
+            type = self.get_field_type(schema_field["type"], table_name)
+            description = None
+            nullable = True
+            if field_path in primary_key_dict:
+                description = (
+                    "Partition Key"
+                    if primary_key_dict.get(field_path) == "HASH"
+                    else "Sort Key"
+                )
+                # primary key should not be nullable
+                nullable = False
+
+            field = SchemaField(
+                fieldPath=field_path,
+                nativeDataType=native_data_type,
+                type=type,
+                description=description,
+                nullable=nullable,
+                recursive=False,
+            )
+            canonical_schema.append(field)
+
+        # create schema metadata object for table
+        schema_metadata = SchemaMetadata(
+            schemaName=table_name,
+            platform=f"urn:li:dataPlatform:{self.platform}",
+            version=0,
+            hash="",
+            platformSchema=SchemalessClass(),
+            fields=canonical_schema,
+        )
+        return schema_metadata
+
+    def extract_primary_key_from_key_schema(
+        self, table_info: Dict[str, Any]
+    ) -> Dict[str, str]:
+        key_schema = table_info.get("KeySchema")
+        primary_key_dict = {}
+        assert isinstance(key_schema, List)
+        for key in key_schema:
+            attribute_name = key.get("AttributeName")
+            key_type = key.get("KeyType")
+            primary_key_dict[attribute_name] = key_type
+        return primary_key_dict
+
+    def get_native_type(self, attribute_type: Union[type, str], table_name: str) -> str:
+        assert isinstance(attribute_type, str)
+        type_string: Optional[str] = _attribute_type_to_native_type_mapping.get(
+            attribute_type
+        )
+        if type_string is None:
+            self.report.report_warning(
+                table_name, f"unable to map type {attribute_type} to native data type"
+            )
+            return _attribute_type_to_native_type_mapping[attribute_type]
+        return type_string
+
+    def get_field_type(
+        self, attribute_type: Union[type, str], table_name: str
+    ) -> SchemaFieldDataType:
+        assert isinstance(attribute_type, str)
+        type_class: Optional[type] = _attribute_type_to_field_type_mapping.get(
+            attribute_type
+        )
+
+        if type_class is None:
+            self.report.report_warning(
+                table_name,
+                f"unable to map type {attribute_type} to metadata schema field type",
+            )
+            type_class = NullTypeClass
+        return SchemaFieldDataType(type=type_class())
+
+    def get_report(self) -> DynamoDBSourceReport:
+        return self.report
diff --git a/metadata-ingestion/tests/integration/dynamodb/dynamodb_default_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/dynamodb/dynamodb_default_platform_instance_mces_golden.json
new file mode 100644
index 0000000000000..f3d6c9809f5d2
--- /dev/null
+++ b/metadata-ingestion/tests/integration/dynamodb/dynamodb_default_platform_instance_mces_golden.json
@@ -0,0 +1,132 @@
+[
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dynamodb,123456789012.us-west-2.Location,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "schemaMetadata",
+    "aspect": {
+        "json": {
+            "schemaName": "Location",
+            "platform": "urn:li:dataPlatform:dynamodb",
+            "version": 0,
+            "created": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            },
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            },
+            "hash": "",
+            "platformSchema": {
+                "com.linkedin.schema.Schemaless": {}
+            },
+            "fields": [
+                {
+                    "fieldPath": "address",
+                    "nullable": true,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.StringType": {}
+                        }
+                    },
+                    "nativeDataType": "String",
+                    "recursive": false,
+                    "isPartOfKey": false
+                },
+                {
+                    "fieldPath": "city",
+                    "nullable": true,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.StringType": {}
+                        }
+                    },
+                    "nativeDataType": "String",
+                    "recursive": false,
+                    "isPartOfKey": false
+                },
+                {
+                    "fieldPath": "partitionKey",
+                    "nullable": false,
+                    "description": "Partition Key",
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.StringType": {}
+                        }
+                    },
+                    "nativeDataType": "String",
+                    "recursive": false,
+                    "isPartOfKey": false
+                },
+                {
+                    "fieldPath": "zip",
+                    "nullable": true,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.NumberType": {}
+                        }
+                    },
+                    "nativeDataType": "Numbers",
+                    "recursive": false,
+                    "isPartOfKey": false
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1693396800000,
+        "runId": "dynamodb-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dynamodb,123456789012.us-west-2.Location,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "table.arn": "arn:aws:dynamodb:us-west-2:123456789012:table/Location",
+                "table.totalItems": "1"
+            },
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1693396800000,
+        "runId": "dynamodb-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dynamodb,123456789012.us-west-2.Location,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:dynamodb",
+            "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:dynamodb,123456789012)"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1693396800000,
+        "runId": "dynamodb-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dynamodb,123456789012.us-west-2.Location,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1693396800000,
+        "runId": "dynamodb-test"
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/dynamodb/dynamodb_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/dynamodb/dynamodb_platform_instance_mces_golden.json
new file mode 100644
index 0000000000000..b1176b1fd5786
--- /dev/null
+++ b/metadata-ingestion/tests/integration/dynamodb/dynamodb_platform_instance_mces_golden.json
@@ -0,0 +1,132 @@
+[
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dynamodb,dynamodb_test.us-west-2.Location,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "schemaMetadata",
+    "aspect": {
+        "json": {
+            "schemaName": "Location",
+            "platform": "urn:li:dataPlatform:dynamodb",
+            "version": 0,
+            "created": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            },
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            },
+            "hash": "",
+            "platformSchema": {
+                "com.linkedin.schema.Schemaless": {}
+            },
+            "fields": [
+                {
+                    "fieldPath": "address",
+                    "nullable": true,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.StringType": {}
+                        }
+                    },
+                    "nativeDataType": "String",
+                    "recursive": false,
+                    "isPartOfKey": false
+                },
+                {
+                    "fieldPath": "city",
+                    "nullable": true,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.StringType": {}
+                        }
+                    },
+                    "nativeDataType": "String",
+                    "recursive": false,
+                    "isPartOfKey": false
+                },
+                {
+                    "fieldPath": "partitionKey",
+                    "nullable": false,
+                    "description": "Partition Key",
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.StringType": {}
+                        }
+                    },
+                    "nativeDataType": "String",
+                    "recursive": false,
+                    "isPartOfKey": false
+                },
+                {
+                    "fieldPath": "zip",
+                    "nullable": true,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.NumberType": {}
+                        }
+                    },
+                    "nativeDataType": "Numbers",
+                    "recursive": false,
+                    "isPartOfKey": false
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1693396800000,
+        "runId": "dynamodb-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dynamodb,dynamodb_test.us-west-2.Location,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "table.arn": "arn:aws:dynamodb:us-west-2:123456789012:table/Location",
+                "table.totalItems": "1"
+            },
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1693396800000,
+        "runId": "dynamodb-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dynamodb,dynamodb_test.us-west-2.Location,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:dynamodb",
+            "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:dynamodb,dynamodb_test)"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1693396800000,
+        "runId": "dynamodb-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dynamodb,dynamodb_test.us-west-2.Location,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1693396800000,
+        "runId": "dynamodb-test"
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/dynamodb/test_dynamodb.py b/metadata-ingestion/tests/integration/dynamodb/test_dynamodb.py
new file mode 100644
index 0000000000000..ef2446ddd8d62
--- /dev/null
+++ b/metadata-ingestion/tests/integration/dynamodb/test_dynamodb.py
@@ -0,0 +1,95 @@
+import pathlib
+
+import boto3
+import pytest
+from freezegun import freeze_time
+from moto import mock_dynamodb
+
+from datahub.ingestion.run.pipeline import Pipeline
+from tests.test_helpers import mce_helpers
+
+test_resources_dir = pathlib.Path(__file__).parent
+FROZEN_TIME = "2023-08-30 12:00:00"
+
+
+@freeze_time(FROZEN_TIME)
+@mock_dynamodb
+@pytest.mark.integration
+def test_dynamodb(pytestconfig, tmp_path, mock_time):
+    boto3.setup_default_session()
+    client = boto3.client("dynamodb", region_name="us-west-2")
+    client.create_table(
+        TableName="Location",
+        KeySchema=[
+            {"AttributeName": "partitionKey", "KeyType": "HASH"},
+        ],
+        AttributeDefinitions=[
+            {"AttributeName": "partitionKey", "AttributeType": "S"},
+        ],
+        ProvisionedThroughput={"ReadCapacityUnits": 10, "WriteCapacityUnits": 10},
+    )
+    client.put_item(
+        TableName="Location",
+        Item={
+            "partitionKey": {"S": "1"},
+            "city": {"S": "San Francisco"},
+            "address": {"S": "1st Market st"},
+            "zip": {"N": "94000"},
+        },
+    )
+
+    pipeline_default_platform_instance = Pipeline.create(
+        {
+            "run_id": "dynamodb-test",
+            "source": {
+                "type": "dynamodb",
+                "config": {
+                    "aws_access_key_id": "test",
+                    "aws_secret_access_key": "test",
+                },
+            },
+            "sink": {
+                "type": "file",
+                "config": {
+                    "filename": f"{tmp_path}/dynamodb_default_platform_instance_mces.json",
+                },
+            },
+        }
+    )
+    pipeline_default_platform_instance.run()
+    pipeline_default_platform_instance.raise_from_status()
+    mce_helpers.check_golden_file(
+        pytestconfig,
+        output_path=f"{tmp_path}/dynamodb_default_platform_instance_mces.json",
+        golden_path=test_resources_dir
+        / "dynamodb_default_platform_instance_mces_golden.json",
+        ignore_paths=mce_helpers.IGNORE_PATH_TIMESTAMPS,
+    )
+
+    pipeline_with_platform_instance = Pipeline.create(
+        {
+            "run_id": "dynamodb-test",
+            "source": {
+                "type": "dynamodb",
+                "config": {
+                    "platform_instance": "dynamodb_test",
+                    "aws_access_key_id": "test",
+                    "aws_secret_access_key": "test",
+                },
+            },
+            "sink": {
+                "type": "file",
+                "config": {
+                    "filename": f"{tmp_path}/dynamodb_platform_instance_mces.json",
+                },
+            },
+        }
+    )
+    pipeline_with_platform_instance.run()
+    pipeline_with_platform_instance.raise_from_status()
+    mce_helpers.check_golden_file(
+        pytestconfig,
+        output_path=f"{tmp_path}/dynamodb_platform_instance_mces.json",
+        golden_path=test_resources_dir / "dynamodb_platform_instance_mces_golden.json",
+        ignore_paths=mce_helpers.IGNORE_PATH_TIMESTAMPS,
+    )
diff --git a/metadata-service/war/src/main/resources/boot/data_platforms.json b/metadata-service/war/src/main/resources/boot/data_platforms.json
index 2abe81d93236c..7a7cec60aa25f 100644
--- a/metadata-service/war/src/main/resources/boot/data_platforms.json
+++ b/metadata-service/war/src/main/resources/boot/data_platforms.json
@@ -544,5 +544,15 @@
       "type": "FILE_SYSTEM",
       "logoUrl": "/assets/platforms/gcslogo.svg"
     }
+  },
+  {
+    "urn": "urn:li:dataPlatform:dynamodb",
+    "aspect": {
+      "datasetNameDelimiter": ".",
+      "name": "dynamodb",
+      "displayName": "DynamoDB",
+      "type": "KEY_VALUE_STORE",
+      "logoUrl": "/assets/platforms/dynamodblogo.png"
+    }
   }
 ]

From 99d7eb756c09a3313a4c1bda6f96a0953004b58c Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Sat, 16 Sep 2023 02:06:04 +0530
Subject: [PATCH 018/156] feat(ingest/bigquery): support bigquery profiling
 with sampling (#8794)

---
 .../ingestion/source/ge_data_profiler.py      | 222 ++++++++++++------
 .../ingestion/source/ge_profiling_config.py   |  20 +-
 2 files changed, 162 insertions(+), 80 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
index 4394d108486be..01e083d566168 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
@@ -616,6 +616,9 @@ def generate_dataset_profile(  # noqa: C901 (complexity)
         logger.debug(f"profiling {self.dataset_name}: flushing stage 1 queries")
         self.query_combiner.flush()
 
+        if self.config.use_sampling and not self.config.limit:
+            self.update_dataset_batch_use_sampling(profile)
+
         columns_profiling_queue: List[_SingleColumnSpec] = []
         if columns_to_profile:
             for column in all_columns:
@@ -737,6 +740,61 @@ def generate_dataset_profile(  # noqa: C901 (complexity)
         self.query_combiner.flush()
         return profile
 
+    def update_dataset_batch_use_sampling(self, profile: DatasetProfileClass) -> None:
+        if (
+            self.dataset.engine.dialect.name.lower() == BIGQUERY
+            and profile.rowCount
+            and profile.rowCount > self.config.sample_size
+        ):
+            """
+            According to BigQuery Sampling Docs(https://cloud.google.com/bigquery/docs/table-sampling),
+            BigQuery does not cache the results of a query that includes a TABLESAMPLE clause and the
+            query may return different results every time. Calculating different column level metrics
+            on different sampling results is possible however each query execution would incur the cost
+            of reading data from storage. Also, using different table samples may create non-coherent
+            representation of column level metrics, for example, minimum value of a column in one sample
+            can be greater than maximum value of the column in another sample.
+
+            It is observed that for a simple select * query with TABLESAMPLE, results are cached and
+            stored in temporary table. This can be (ab)used and all column level profiling calculations
+            can be performed against it.
+
+            Risks:
+                1. All the risks mentioned in notes of `create_bigquery_temp_table` are also
+                applicable here.
+                2. TABLESAMPLE query may read entire table for small tables that are written
+                as single data block. This may incorrectly label datasetProfile's partition as
+                "SAMPLE", although profile is for entire table.
+                3. Table Sampling in BigQuery is a Pre-GA (Preview) feature.
+            """
+            sample_pc = 100 * self.config.sample_size / profile.rowCount
+            sql = (
+                f"SELECT * FROM {str(self.dataset._table)} "
+                + f"TABLESAMPLE SYSTEM ({sample_pc:.3f} percent)"
+            )
+            temp_table_name = create_bigquery_temp_table(
+                self,
+                sql,
+                self.dataset_name,
+                self.dataset.engine.engine.raw_connection(),
+            )
+            if temp_table_name:
+                self.dataset._table = sa.text(temp_table_name)
+                logger.debug(f"Setting table name to be {self.dataset._table}")
+
+                if (
+                    profile.partitionSpec
+                    and profile.partitionSpec.type == PartitionTypeClass.FULL_TABLE
+                ):
+                    profile.partitionSpec = PartitionSpecClass(
+                        type=PartitionTypeClass.QUERY, partition="SAMPLE"
+                    )
+                elif (
+                    profile.partitionSpec
+                    and profile.partitionSpec.type == PartitionTypeClass.PARTITION
+                ):
+                    profile.partitionSpec.partition += " SAMPLE"
+
 
 @dataclasses.dataclass
 class GEContext:
@@ -961,84 +1019,18 @@ def _generate_single_profile(
         if platform == BIGQUERY and (
             custom_sql or self.config.limit or self.config.offset
         ):
-            # On BigQuery, we need to bypass GE's mechanism for creating temporary tables because
-            # it requires create/delete table permissions.
-            import google.cloud.bigquery.job.query
-            from google.cloud.bigquery.dbapi.cursor import Cursor as BigQueryCursor
-
-            raw_connection = self.base_engine.raw_connection()
-            try:
-                cursor: "BigQueryCursor" = cast(
-                    "BigQueryCursor", raw_connection.cursor()
-                )
-                if custom_sql is not None:
-                    # Note that limit and offset are not supported for custom SQL.
-                    # Presence of custom SQL represents that the bigquery table
-                    # is either partitioned or sharded
-                    bq_sql = custom_sql
-                else:
-                    bq_sql = f"SELECT * FROM `{table}`"
-                    if self.config.limit:
-                        bq_sql += f" LIMIT {self.config.limit}"
-                    if self.config.offset:
-                        bq_sql += f" OFFSET {self.config.offset}"
-                try:
-                    cursor.execute(bq_sql)
-                except Exception as e:
-                    if not self.config.catch_exceptions:
-                        raise e
-                    logger.exception(
-                        f"Encountered exception while profiling {pretty_name}"
-                    )
-                    self.report.report_warning(
-                        pretty_name,
-                        f"Profiling exception {e} when running custom sql {bq_sql}",
-                    )
-                    return None
-
-                # Great Expectations batch v2 API, which is the one we're using, requires
-                # a concrete table name against which profiling is executed. Normally, GE
-                # creates a table with an expiry time of 24 hours. However, we don't want the
-                # temporary tables to stick around that long, so we'd also have to delete them
-                # ourselves. As such, the profiler required create and delete table permissions
-                # on BigQuery.
-                #
-                # It turns out that we can (ab)use the BigQuery cached results feature
-                # to avoid creating temporary tables ourselves. For almost all queries, BigQuery
-                # will store the results in a temporary, cached results table when an explicit
-                # destination table is not provided. These tables are pretty easy to identify
-                # because they live in "anonymous datasets" and have a name that looks like
-                # "project-id._d60e97aec7f471046a960419adb6d44e98300db7.anon10774d0ea85fd20fe9671456c5c53d5f1b85e1b17bedb232dfce91661a219ee3"
-                # These tables are per-user and per-project, so there's no risk of permissions escalation.
-                # As per the docs, the cached results tables typically have a lifetime of 24 hours,
-                # which should be plenty for our purposes.
-                # See https://cloud.google.com/bigquery/docs/cached-results for more details.
-                #
-                # The code below extracts the name of the cached results table from the query job
-                # and points GE to that table for profiling.
-                #
-                # Risks:
-                # 1. If the query results are larger than the maximum response size, BigQuery will
-                #    not cache the results. According to the docs https://cloud.google.com/bigquery/quotas,
-                #    the maximum response size is 10 GB compressed.
-                # 2. The cache lifetime of 24 hours is "best-effort" and hence not guaranteed.
-                # 3. Tables with column-level security may not be cached, and tables with row-level
-                #    security will not be cached.
-                # 4. BigQuery "discourages" using cached results directly, but notes that
-                #    the current semantics do allow it.
-                #
-                # The better long-term solution would be to use a subquery avoid this whole
-                # temporary table dance. However, that would require either a) upgrading to
-                # use GE's batch v3 API or b) bypassing GE altogether.
-
-                query_job: Optional[
-                    "google.cloud.bigquery.job.query.QueryJob"
-                ] = cursor._query_job
-                assert query_job
-                temp_destination_table = query_job.destination
-                bigquery_temp_table = f"{temp_destination_table.project}.{temp_destination_table.dataset_id}.{temp_destination_table.table_id}"
-            finally:
-                raw_connection.close()
+            if custom_sql is not None:
+                # Note that limit and offset are not supported for custom SQL.
+                bq_sql = custom_sql
+            else:
+                bq_sql = f"SELECT * FROM `{table}`"
+                if self.config.limit:
+                    bq_sql += f" LIMIT {self.config.limit}"
+                if self.config.offset:
+                    bq_sql += f" OFFSET {self.config.offset}"
+            bigquery_temp_table = create_bigquery_temp_table(
+                self, bq_sql, pretty_name, self.base_engine.raw_connection()
+            )
 
         if platform == BIGQUERY:
             if bigquery_temp_table:
@@ -1128,6 +1120,7 @@ def _get_ge_dataset(
                 **batch_kwargs,
             },
         )
+
         if platform == BIGQUERY:
             # This is done as GE makes the name as DATASET.TABLE
             # but we want it to be PROJECT.DATASET.TABLE instead for multi-project setups
@@ -1153,3 +1146,76 @@ def _get_column_types_to_ignore(dialect_name: str) -> List[str]:
         return ["JSON"]
 
     return []
+
+
+def create_bigquery_temp_table(
+    instance: Union[DatahubGEProfiler, _SingleDatasetProfiler],
+    bq_sql: str,
+    table_pretty_name: str,
+    raw_connection: Any,
+) -> Optional[str]:
+    # On BigQuery, we need to bypass GE's mechanism for creating temporary tables because
+    # it requires create/delete table permissions.
+    import google.cloud.bigquery.job.query
+    from google.cloud.bigquery.dbapi.cursor import Cursor as BigQueryCursor
+
+    try:
+        cursor: "BigQueryCursor" = cast("BigQueryCursor", raw_connection.cursor())
+        try:
+            cursor.execute(bq_sql)
+        except Exception as e:
+            if not instance.config.catch_exceptions:
+                raise e
+            logger.exception(
+                f"Encountered exception while profiling {table_pretty_name}"
+            )
+            instance.report.report_warning(
+                table_pretty_name,
+                f"Profiling exception {e} when running custom sql {bq_sql}",
+            )
+            return None
+
+        # Great Expectations batch v2 API, which is the one we're using, requires
+        # a concrete table name against which profiling is executed. Normally, GE
+        # creates a table with an expiry time of 24 hours. However, we don't want the
+        # temporary tables to stick around that long, so we'd also have to delete them
+        # ourselves. As such, the profiler required create and delete table permissions
+        # on BigQuery.
+        #
+        # It turns out that we can (ab)use the BigQuery cached results feature
+        # to avoid creating temporary tables ourselves. For almost all queries, BigQuery
+        # will store the results in a temporary, cached results table when an explicit
+        # destination table is not provided. These tables are pretty easy to identify
+        # because they live in "anonymous datasets" and have a name that looks like
+        # "project-id._d60e97aec7f471046a960419adb6d44e98300db7.anon10774d0ea85fd20fe9671456c5c53d5f1b85e1b17bedb232dfce91661a219ee3"
+        # These tables are per-user and per-project, so there's no risk of permissions escalation.
+        # As per the docs, the cached results tables typically have a lifetime of 24 hours,
+        # which should be plenty for our purposes.
+        # See https://cloud.google.com/bigquery/docs/cached-results for more details.
+        #
+        # The code below extracts the name of the cached results table from the query job
+        # and points GE to that table for profiling.
+        #
+        # Risks:
+        # 1. If the query results are larger than the maximum response size, BigQuery will
+        #    not cache the results. According to the docs https://cloud.google.com/bigquery/quotas,
+        #    the maximum response size is 10 GB compressed.
+        # 2. The cache lifetime of 24 hours is "best-effort" and hence not guaranteed.
+        # 3. Tables with column-level security may not be cached, and tables with row-level
+        #    security will not be cached.
+        # 4. BigQuery "discourages" using cached results directly, but notes that
+        #    the current semantics do allow it.
+        #
+        # The better long-term solution would be to use a subquery avoid this whole
+        # temporary table dance. However, that would require either a) upgrading to
+        # use GE's batch v3 API or b) bypassing GE altogether.
+
+        query_job: Optional[
+            "google.cloud.bigquery.job.query.QueryJob"
+        ] = cursor._query_job
+        assert query_job
+        temp_destination_table = query_job.destination
+        bigquery_temp_table = f"{temp_destination_table.project}.{temp_destination_table.dataset_id}.{temp_destination_table.table_id}"
+        return bigquery_temp_table
+    finally:
+        raw_connection.close()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py
index 1488b55062b68..77761c529ba0b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py
@@ -145,10 +145,26 @@ class GEProfilingConfig(ConfigModel):
     # Hidden option - used for debugging purposes.
     catch_exceptions: bool = Field(default=True, description="")
 
-    partition_profiling_enabled: bool = Field(default=True, description="")
+    partition_profiling_enabled: bool = Field(
+        default=True,
+        description="Whether to profile partitioned tables. Only BigQuery supports this. "
+        "If enabled, latest partition data is used for profiling.",
+    )
     partition_datetime: Optional[datetime.datetime] = Field(
         default=None,
-        description="For partitioned datasets profile only the partition which matches the datetime or profile the latest one if not set. Only Bigquery supports this.",
+        description="If specified, profile only the partition which matches this datetime. "
+        "If not specified, profile the latest partition. Only Bigquery supports this.",
+    )
+    use_sampling: bool = Field(
+        default=True,
+        description="Whether to profile column level stats on sample of table. Only BigQuery supports this. "
+        "If enabled, profiling is done on rows sampled from table. Sampling is not done for smaller tables. ",
+    )
+
+    sample_size: int = Field(
+        default=1000,
+        description="Number of rows to be sampled from table for column level profiling."
+        "Applicable only if `use_sampling` is set to True.",
     )
 
     @pydantic.root_validator(pre=True)

From 5882fe407535b2362dcfcda7c1e123e6067d7e89 Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Mon, 18 Sep 2023 16:14:02 -0400
Subject: [PATCH 019/156] Fix for edit_documentation and glossary_navigation
 cypress tests (#8838)

---
 .../cypress/e2e/glossary/glossary_navigation.js     |  6 ++----
 .../cypress/e2e/mutations/edit_documentation.js     | 13 +++++++------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js
index cd5622d0cd903..de9fa7ecda1f0 100644
--- a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js
+++ b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js
@@ -28,8 +28,6 @@ describe("glossary sidebar navigation test", () => {
         //ensure the new term is under the parent term group in the navigation sidebar
         cy.get('*[class^="GlossaryBrowser"]').contains(glossaryTermGroup).click();
         cy.get('*[class^="GlossaryEntitiesList"]').contains(glossaryTerm).should("be.visible");
-        cy.get('*[class^="GlossaryBrowser"] [aria-label="down"]').click().wait(1000);
-        cy.get('*[class^="GlossaryBrowser"]').contains(glossaryTerm).should("not.exist");
         //move a term group from the root level to be under a parent term group
         cy.goToGlossaryList();
         cy.clickOptionWithText(glossaryTermGroup);
@@ -41,8 +39,8 @@ describe("glossary sidebar navigation test", () => {
         cy.get("button").contains("Move").click();
         cy.waitTextVisible("Moved Term Group!");
         //ensure it is no longer on the sidebar navigator at the top level but shows up under the new parent
-        cy.get('*[class^="GlossaryBrowser"] [aria-label="down"]').click().wait(1000);
-        cy.get('*[class^="GlossaryBrowser"]').contains(glossaryTermGroup).should("not.exist");
+        cy.get('*[class^="GlossaryBrowser"]').contains(glossaryParentGroup).click();
+        cy.get('*[class^="GlossaryEntitiesList"]').contains(glossaryTermGroup).should("be.visible");
         //delete a term group
         cy.goToGlossaryList();
         cy.clickOptionWithText(glossaryParentGroup);
diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js b/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js
index e4e5a39ce1100..83b66e2cb2549 100644
--- a/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js
+++ b/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js
@@ -37,8 +37,8 @@ describe("edit documentation and link to dataset", () => {
     cy.contains("Sample doc").trigger("mouseover", { force: true });
     cy.get('[data-icon="delete"]').click();
     cy.waitTextVisible("Link Removed");
-    cy.get("button").contains("Add Link").click();
-    cy.get("#addLinkForm_url").type(wrong_url);
+    cy.get("button").contains("Add Link").click().wait(1000);
+    cy.get('[role="dialog"] #addLinkForm_url').type(wrong_url);
     cy.waitTextVisible("This field must be a valid url.");
     cy.focused().clear();
     cy.waitTextVisible("A URL is required.");
@@ -54,9 +54,9 @@ describe("edit documentation and link to dataset", () => {
   it("open test domain page, remove and add dataset link", () => {
     cy.loginWithCredentials();
     cy.visit("/domain/urn:li:domain:marketing/Entities");
-    cy.get("[role='tab']").contains("Documentation").click();
-    cy.get("button").contains("Add Link").click();
-    cy.get("#addLinkForm_url").type(wrong_url);
+    cy.waitTextVisible("SampleCypressKafkaDataset");
+    cy.get("button").contains("Add Link").click().wait(1000);
+    cy.get('[role="dialog"] #addLinkForm_url').type(wrong_url);
     cy.waitTextVisible("This field must be a valid url.");
     cy.focused().clear();
     cy.waitTextVisible("A URL is required.");
@@ -66,6 +66,7 @@ describe("edit documentation and link to dataset", () => {
     cy.get('[role="dialog"] button').contains("Add").click();
     cy.waitTextVisible("Link Added");
     cy.get("[role='tab']").contains("Documentation").click();
+    cy.waitTextVisible("Edit");
     cy.get(`[href='${correct_url}']`).should("be.visible");
     cy.contains("Sample doc").trigger("mouseover", { force: true });
     cy.get('[data-icon="delete"]').click();
@@ -94,4 +95,4 @@ describe("edit documentation and link to dataset", () => {
     cy.waitTextVisible("Foo field description has changed");
     cy.waitTextVisible("(edited)");
   });
-});
+});
\ No newline at end of file

From 85fa5a1c4fdf2b4c4439558fa3a4cbbfd3491fbf Mon Sep 17 00:00:00 2001
From: Chris Collins <chriscollins3456@gmail.com>
Date: Mon, 18 Sep 2023 16:14:33 -0400
Subject: [PATCH 020/156] feat(ui/java) Update domains to be nested (#8841)

Allow the ability to now nest domains underneath other domains. This should work much like the business glossary where you can add domains underneath other domains, move domains underneath other domains or at the root, and navigate domains using a nice new navigator.
---
 .../datahub/graphql/GmsGraphQLEngine.java     |  15 +-
 .../exception/DataHubGraphQLErrorCode.java    |   1 +
 .../graphql/featureflags/FeatureFlags.java    |   1 +
 .../resolvers/config/AppConfigResolver.java   |   1 +
 .../domain/CreateDomainResolver.java          |  29 ++-
 .../domain/DeleteDomainResolver.java          |   6 +
 .../domain/DomainEntitiesResolver.java        |  12 +-
 .../resolvers/domain/ListDomainsResolver.java |  16 +-
 .../domain/ParentDomainsResolver.java         |  59 +++++
 .../resolvers/mutate/MoveDomainResolver.java  |  89 +++++++
 .../resolvers/mutate/UpdateNameResolver.java  |  14 ++
 .../resolvers/mutate/util/DomainUtils.java    | 222 ++++++++++++++++++
 .../src/main/resources/app.graphql            |   7 +
 .../src/main/resources/entity.graphql         |  50 +++-
 .../domain/CreateDomainResolverTest.java      | 177 +++++++++++++-
 .../domain/DeleteDomainResolverTest.java      |  27 +++
 .../domain/ListDomainsResolverTest.java       |  48 +++-
 .../domain/MoveDomainResolverTest.java        | 140 +++++++++++
 .../domain/ParentDomainsResolverTest.java     |  95 ++++++++
 .../glossary/UpdateNameResolverTest.java      |  12 +
 datahub-web-react/src/app/SearchRoutes.tsx    |  14 +-
 datahub-web-react/src/app/analytics/event.ts  |   9 +
 .../src/app/domain/CreateDomainModal.tsx      |  97 ++++++--
 .../src/app/domain/DomainIcon.tsx             |  11 +
 .../src/app/domain/DomainRoutes.tsx           |  39 +++
 .../src/app/domain/DomainSearch.tsx           | 143 +++++++++++
 .../src/app/domain/DomainsContext.tsx         |  21 ++
 .../src/app/domain/DomainsList.tsx            |  12 +-
 .../src/app/domain/ManageDomainsPage.tsx      |  31 ++-
 .../nestedDomains/DomainsSidebarHeader.tsx    |  58 +++++
 .../app/domain/nestedDomains/DomainsTitle.tsx |  18 ++
 .../nestedDomains/ManageDomainsPageV2.tsx     |  60 +++++
 .../nestedDomains/ManageDomainsSidebar.tsx    |  28 +++
 .../app/domain/nestedDomains/RootDomains.tsx  |  31 +++
 .../domainNavigator/DomainNavigator.tsx       |  37 +++
 .../domainNavigator/DomainNode.tsx            | 137 +++++++++++
 .../domainNavigator/useHasDomainChildren.ts   |  29 +++
 .../src/app/domain/useListDomains.tsx         |  27 +++
 datahub-web-react/src/app/domain/utils.ts     |  72 +++++-
 .../src/app/entity/EntityRegistry.tsx         |   6 +
 .../src/app/entity/domain/DomainEntity.tsx    |  22 +-
 .../domain/preview/DomainEntitiesSnippet.tsx  |  45 ++++
 .../src/app/entity/domain/preview/Preview.tsx |  21 +-
 .../entity/glossaryNode/preview/Preview.tsx   |   2 +-
 .../entity/glossaryTerm/preview/Preview.tsx   |   2 +-
 .../EntityDropdown/DomainParentSelect.tsx     | 108 +++++++++
 .../shared/EntityDropdown/EntityDropdown.tsx  |  35 +--
 .../shared/EntityDropdown/MoveDomainModal.tsx | 102 ++++++++
 .../EntityDropdown/NodeParentSelect.tsx       |  79 ++-----
 .../shared/EntityDropdown/useDeleteEntity.tsx |   7 +
 .../EntityDropdown/useHandleDeleteDomain.ts   |  27 +++
 .../useHandleMoveDomainComplete.ts            |  40 ++++
 .../EntityDropdown/useParentSelector.ts       |  76 ++++++
 .../app/entity/shared/EntityDropdown/utils.ts |  50 +++-
 .../src/app/entity/shared/constants.ts        |   1 +
 .../containers/profile/EntityProfile.tsx      |   2 +
 .../containers/profile/header/EntityName.tsx  |  28 ++-
 .../PlatformContentContainer.tsx              |   1 +
 .../PlatformContent/PlatformContentView.tsx   |  13 +-
 .../profile/sidebar/Domain/SetDomainModal.tsx |  78 +++---
 .../src/app/entity/shared/types.ts            |   3 +
 .../src/app/glossary/BusinessGlossaryPage.tsx |   6 -
 .../src/app/glossary/GlossarySidebar.tsx      |  12 +-
 .../policy/PolicyPrivilegeForm.tsx            |  88 ++++---
 .../src/app/preview/DefaultPreviewCard.tsx    |   8 +-
 .../renderer/component/DomainSearchList.tsx   |  58 ++++-
 .../renderer/component/HoverEntityTooltip.tsx |   6 +-
 .../src/app/search/SearchResultList.tsx       |   3 +-
 .../src/app/search/SearchResults.tsx          |   3 +-
 .../autoComplete/AutoCompleteEntity.tsx       |   6 +-
 .../src/app/search/filters/FilterOption.tsx   |  21 +-
 .../{ParentNodes.tsx => ParentEntities.tsx}   |  53 +++--
 .../src/app/search/filters/utils.tsx          |  15 ++
 .../src/app/search/sidebar/BrowseSidebar.tsx  |   3 +-
 .../src/app/search/sidebar/ExpandableNode.tsx |  30 +--
 .../src/app/shared/LogoCountCard.tsx          |  26 +-
 .../src/app/shared/admin/HeaderLinks.tsx      |   9 +-
 .../src/app/shared/components.tsx             |  49 ++++
 .../src/app/shared/deleteUtils.ts             |   4 +-
 .../src/app/shared/sidebar/components.tsx     |  23 ++
 .../src/app/shared/styleUtils.ts              |   7 +
 .../src/app/shared/tags/AddTagsTermsModal.tsx |   6 +-
 .../src/app/shared/tags/DomainLink.tsx        |   9 +-
 datahub-web-react/src/app/shared/useToggle.ts |  24 +-
 datahub-web-react/src/app/useAppConfig.ts     |   5 +
 datahub-web-react/src/appConfigContext.tsx    |   1 +
 datahub-web-react/src/conf/Global.ts          |   1 +
 datahub-web-react/src/graphql/app.graphql     |   1 +
 datahub-web-react/src/graphql/domain.graphql  |  30 ++-
 .../src/graphql/fragments.graphql             |  32 +++
 datahub-web-react/src/graphql/preview.graphql |   5 +
 datahub-web-react/src/graphql/search.graphql  |  10 +
 .../authorization/ResolvedResourceSpec.java   |  32 ---
 .../com/linkedin/domain/DomainProperties.pdl  |  15 ++
 .../DomainFieldResolverProvider.java          |  68 +++++-
 .../authorization/DataHubAuthorizerTest.java  | 145 ++++++++++--
 .../src/main/resources/application.yml        |   1 +
 .../datahubusage/DataHubUsageEventType.java   |   1 +
 node_modules/.yarn-integrity                  |  12 +
 .../cypress/cypress/e2e/mutations/domains.js  |  23 +-
 yarn.lock                                     |   4 +
 101 files changed, 3083 insertions(+), 415 deletions(-)
 create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ParentDomainsResolver.java
 create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MoveDomainResolver.java
 create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/MoveDomainResolverTest.java
 create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/ParentDomainsResolverTest.java
 create mode 100644 datahub-web-react/src/app/domain/DomainIcon.tsx
 create mode 100644 datahub-web-react/src/app/domain/DomainRoutes.tsx
 create mode 100644 datahub-web-react/src/app/domain/DomainSearch.tsx
 create mode 100644 datahub-web-react/src/app/domain/DomainsContext.tsx
 create mode 100644 datahub-web-react/src/app/domain/nestedDomains/DomainsSidebarHeader.tsx
 create mode 100644 datahub-web-react/src/app/domain/nestedDomains/DomainsTitle.tsx
 create mode 100644 datahub-web-react/src/app/domain/nestedDomains/ManageDomainsPageV2.tsx
 create mode 100644 datahub-web-react/src/app/domain/nestedDomains/ManageDomainsSidebar.tsx
 create mode 100644 datahub-web-react/src/app/domain/nestedDomains/RootDomains.tsx
 create mode 100644 datahub-web-react/src/app/domain/nestedDomains/domainNavigator/DomainNavigator.tsx
 create mode 100644 datahub-web-react/src/app/domain/nestedDomains/domainNavigator/DomainNode.tsx
 create mode 100644 datahub-web-react/src/app/domain/nestedDomains/domainNavigator/useHasDomainChildren.ts
 create mode 100644 datahub-web-react/src/app/domain/useListDomains.tsx
 create mode 100644 datahub-web-react/src/app/entity/domain/preview/DomainEntitiesSnippet.tsx
 create mode 100644 datahub-web-react/src/app/entity/shared/EntityDropdown/DomainParentSelect.tsx
 create mode 100644 datahub-web-react/src/app/entity/shared/EntityDropdown/MoveDomainModal.tsx
 create mode 100644 datahub-web-react/src/app/entity/shared/EntityDropdown/useHandleDeleteDomain.ts
 create mode 100644 datahub-web-react/src/app/entity/shared/EntityDropdown/useHandleMoveDomainComplete.ts
 create mode 100644 datahub-web-react/src/app/entity/shared/EntityDropdown/useParentSelector.ts
 rename datahub-web-react/src/app/search/filters/{ParentNodes.tsx => ParentEntities.tsx} (54%)
 create mode 100644 datahub-web-react/src/app/shared/components.tsx
 create mode 100644 datahub-web-react/src/app/shared/sidebar/components.tsx
 create mode 100644 datahub-web-react/src/app/shared/styleUtils.ts
 create mode 100644 node_modules/.yarn-integrity
 create mode 100644 yarn.lock

diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
index 682710ad5d539..d86234cf59306 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
@@ -81,6 +81,7 @@
 import com.linkedin.datahub.graphql.generated.Notebook;
 import com.linkedin.datahub.graphql.generated.Owner;
 import com.linkedin.datahub.graphql.generated.OwnershipTypeEntity;
+import com.linkedin.datahub.graphql.generated.ParentDomainsResult;
 import com.linkedin.datahub.graphql.generated.PolicyMatchCriterionValue;
 import com.linkedin.datahub.graphql.generated.QueryEntity;
 import com.linkedin.datahub.graphql.generated.QuerySubject;
@@ -124,6 +125,7 @@
 import com.linkedin.datahub.graphql.resolvers.domain.DeleteDomainResolver;
 import com.linkedin.datahub.graphql.resolvers.domain.DomainEntitiesResolver;
 import com.linkedin.datahub.graphql.resolvers.domain.ListDomainsResolver;
+import com.linkedin.datahub.graphql.resolvers.domain.ParentDomainsResolver;
 import com.linkedin.datahub.graphql.resolvers.domain.SetDomainResolver;
 import com.linkedin.datahub.graphql.resolvers.domain.UnsetDomainResolver;
 import com.linkedin.datahub.graphql.resolvers.embed.UpdateEmbedResolver;
@@ -186,6 +188,7 @@
 import com.linkedin.datahub.graphql.resolvers.mutate.BatchSetDomainResolver;
 import com.linkedin.datahub.graphql.resolvers.mutate.BatchUpdateDeprecationResolver;
 import com.linkedin.datahub.graphql.resolvers.mutate.BatchUpdateSoftDeletedResolver;
+import com.linkedin.datahub.graphql.resolvers.mutate.MoveDomainResolver;
 import com.linkedin.datahub.graphql.resolvers.mutate.MutableTypeBatchResolver;
 import com.linkedin.datahub.graphql.resolvers.mutate.MutableTypeResolver;
 import com.linkedin.datahub.graphql.resolvers.mutate.RemoveLinkResolver;
@@ -944,6 +947,7 @@ private void configureMutationResolvers(final RuntimeWiring.Builder builder) {
             .dataFetcher("removeGroup", new RemoveGroupResolver(this.entityClient))
             .dataFetcher("updateUserStatus", new UpdateUserStatusResolver(this.entityClient))
             .dataFetcher("createDomain", new CreateDomainResolver(this.entityClient, this.entityService))
+            .dataFetcher("moveDomain", new MoveDomainResolver(this.entityService, this.entityClient))
             .dataFetcher("deleteDomain", new DeleteDomainResolver(entityClient))
             .dataFetcher("setDomain", new SetDomainResolver(this.entityClient, this.entityService))
             .dataFetcher("batchSetDomain", new BatchSetDomainResolver(this.entityService))
@@ -1029,6 +1033,13 @@ private void configureGenericEntityResolvers(final RuntimeWiring.Builder builder
                 .dataFetcher("entities", new EntityTypeBatchResolver(entityTypes,
                     (env) -> ((BrowseResults) env.getSource()).getEntities()))
             )
+            .type("ParentDomainsResult", typeWiring -> typeWiring
+                .dataFetcher("domains", new EntityTypeBatchResolver(entityTypes,
+                    (env) -> {
+                        final ParentDomainsResult result = env.getSource();
+                        return result != null ? result.getDomains() : null;
+                    }))
+            )
             .type("EntityRelationshipLegacy", typeWiring -> typeWiring
                 .dataFetcher("entity", new EntityTypeResolver(entityTypes,
                     (env) -> ((EntityRelationshipLegacy) env.getSource()).getEntity()))
@@ -1675,8 +1686,8 @@ private void configureGlossaryRelationshipResolvers(final RuntimeWiring.Builder
     private void configureDomainResolvers(final RuntimeWiring.Builder builder) {
         builder.type("Domain", typeWiring -> typeWiring
             .dataFetcher("entities", new DomainEntitiesResolver(this.entityClient))
-            .dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient)
-            )
+            .dataFetcher("parentDomains", new ParentDomainsResolver(this.entityClient))
+            .dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient))
         );
         builder.type("DomainAssociation", typeWiring -> typeWiring
             .dataFetcher("domain",
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/exception/DataHubGraphQLErrorCode.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/exception/DataHubGraphQLErrorCode.java
index db3e1dd03e419..44695c334855f 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/exception/DataHubGraphQLErrorCode.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/exception/DataHubGraphQLErrorCode.java
@@ -4,6 +4,7 @@ public enum DataHubGraphQLErrorCode {
   BAD_REQUEST(400),
   UNAUTHORIZED(403),
   NOT_FOUND(404),
+  CONFLICT(409),
   SERVER_ERROR(500);
 
   private final int _code;
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java
index de3c217db01ec..4d6133f18df05 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java
@@ -16,4 +16,5 @@ public class FeatureFlags {
   private PreProcessHooks preProcessHooks;
   private boolean showAcrylInfo = false;
   private boolean showAccessManagement = false;
+  private boolean nestedDomainsEnabled = false;
 }
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java
index 09df985b19cf5..f6bc68caa0821 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java
@@ -172,6 +172,7 @@ public CompletableFuture<AppConfig> get(final DataFetchingEnvironment environmen
       .setShowBrowseV2(_featureFlags.isShowBrowseV2())
       .setShowAcrylInfo(_featureFlags.isShowAcrylInfo())
       .setShowAccessManagement(_featureFlags.isShowAccessManagement())
+      .setNestedDomainsEnabled(_featureFlags.isNestedDomainsEnabled())
       .build();
 
     appConfig.setFeatureFlags(featureFlagsConfig);
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolver.java
index 39aa1ea28da20..1930cdc1f8667 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolver.java
@@ -1,14 +1,18 @@
 package com.linkedin.datahub.graphql.resolvers.domain;
 
 import com.linkedin.common.AuditStamp;
+import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.data.template.SetMode;
 import com.linkedin.datahub.graphql.QueryContext;
 import com.linkedin.datahub.graphql.authorization.AuthorizationUtils;
 import com.linkedin.datahub.graphql.exception.AuthorizationException;
+import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode;
+import com.linkedin.datahub.graphql.exception.DataHubGraphQLException;
 import com.linkedin.datahub.graphql.generated.CreateDomainInput;
 import com.linkedin.datahub.graphql.generated.OwnerEntityType;
 import com.linkedin.datahub.graphql.generated.OwnershipType;
+import com.linkedin.datahub.graphql.resolvers.mutate.util.DomainUtils;
 import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils;
 import com.linkedin.domain.DomainProperties;
 import com.linkedin.entity.client.EntityClient;
@@ -19,8 +23,11 @@
 import com.linkedin.mxe.MetadataChangeProposal;
 import graphql.schema.DataFetcher;
 import graphql.schema.DataFetchingEnvironment;
+
+import java.net.URISyntaxException;
 import java.util.UUID;
 import java.util.concurrent.CompletableFuture;
+
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 
@@ -45,9 +52,9 @@ public CompletableFuture<String> get(DataFetchingEnvironment environment) throws
 
     final QueryContext context = environment.getContext();
     final CreateDomainInput input = bindArgument(environment.getArgument("input"), CreateDomainInput.class);
+    final Urn parentDomain = input.getParentDomain() != null ? UrnUtils.getUrn(input.getParentDomain()) : null;
 
     return CompletableFuture.supplyAsync(() -> {
-
       if (!AuthorizationUtils.canCreateDomains(context)) {
         throw new AuthorizationException("Unauthorized to perform this action. Please contact your DataHub administrator.");
       }
@@ -64,6 +71,17 @@ public CompletableFuture<String> get(DataFetchingEnvironment environment) throws
           throw new IllegalArgumentException("This Domain already exists!");
         }
 
+        if (parentDomain != null && !_entityClient.exists(parentDomain, context.getAuthentication())) {
+          throw new IllegalArgumentException("Parent Domain does not exist!");
+        }
+
+        if (DomainUtils.hasNameConflict(input.getName(), parentDomain, context, _entityClient)) {
+          throw new DataHubGraphQLException(
+              String.format("\"%s\" already exists in this domain. Please pick a unique name.", input.getName()),
+              DataHubGraphQLErrorCode.CONFLICT
+          );
+        }
+
         // Create the MCP
         final MetadataChangeProposal proposal = buildMetadataChangeProposalWithKey(key, DOMAIN_ENTITY_NAME,
             DOMAIN_PROPERTIES_ASPECT_NAME, mapDomainProperties(input, context));
@@ -77,6 +95,8 @@ public CompletableFuture<String> get(DataFetchingEnvironment environment) throws
         }
         OwnerUtils.addCreatorAsOwner(context, domainUrn, OwnerEntityType.CORP_USER, ownershipType, _entityService);
         return domainUrn;
+      } catch (DataHubGraphQLException e) {
+        throw e;
       } catch (Exception e) {
         log.error("Failed to create Domain with id: {}, name: {}: {}", input.getId(), input.getName(), e.getMessage());
         throw new RuntimeException(String.format("Failed to create Domain with id: %s, name: %s", input.getId(), input.getName()), e);
@@ -89,6 +109,13 @@ private DomainProperties mapDomainProperties(final CreateDomainInput input, fina
     result.setName(input.getName());
     result.setDescription(input.getDescription(), SetMode.IGNORE_NULL);
     result.setCreated(new AuditStamp().setActor(UrnUtils.getUrn(context.getActorUrn())).setTime(System.currentTimeMillis()));
+    if (input.getParentDomain() != null) {
+      try {
+        result.setParentDomain(Urn.createFromString(input.getParentDomain()));
+      } catch (URISyntaxException e) {
+        throw new RuntimeException(String.format("Failed to create Domain Urn from string: %s", input.getParentDomain()), e);
+      }
+    }
     return result;
   }
 }
\ No newline at end of file
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DeleteDomainResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DeleteDomainResolver.java
index 60a03fcddcc4d..9ab90e8b4ff72 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DeleteDomainResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DeleteDomainResolver.java
@@ -4,6 +4,7 @@
 import com.linkedin.datahub.graphql.QueryContext;
 import com.linkedin.datahub.graphql.authorization.AuthorizationUtils;
 import com.linkedin.datahub.graphql.exception.AuthorizationException;
+import com.linkedin.datahub.graphql.resolvers.mutate.util.DomainUtils;
 import com.linkedin.entity.client.EntityClient;
 import graphql.schema.DataFetcher;
 import graphql.schema.DataFetchingEnvironment;
@@ -32,6 +33,11 @@ public CompletableFuture<Boolean> get(final DataFetchingEnvironment environment)
 
       if (AuthorizationUtils.canManageDomains(context) || AuthorizationUtils.canDeleteEntity(urn, context)) {
         try {
+          // Make sure there are no child domains
+          if (DomainUtils.hasChildDomains(urn, context, _entityClient)) {
+            throw new RuntimeException(String.format("Cannot delete domain %s which has child domains", domainUrn));
+          }
+
           _entityClient.deleteEntity(urn, context.getAuthentication());
           log.info(String.format("I've successfully deleted the entity %s with urn", domainUrn));
 
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java
index 06bfa36fc3c14..0bf551c4683e6 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java
@@ -1,6 +1,5 @@
 package com.linkedin.datahub.graphql.resolvers.domain;
 
-import com.google.common.collect.ImmutableList;
 import com.linkedin.datahub.graphql.QueryContext;
 import com.linkedin.datahub.graphql.generated.Domain;
 import com.linkedin.datahub.graphql.generated.DomainEntitiesInput;
@@ -67,17 +66,22 @@ public CompletableFuture<SearchResults> get(final DataFetchingEnvironment enviro
 
       try {
 
+        final CriterionArray criteria = new CriterionArray();
         final Criterion filterCriterion =  new Criterion()
             .setField(DOMAINS_FIELD_NAME + ".keyword")
             .setCondition(Condition.EQUAL)
             .setValue(urn);
+        criteria.add(filterCriterion);
+        if (input.getFilters() != null) {
+          input.getFilters().forEach(filter -> {
+            criteria.add(new Criterion().setField(filter.getField()).setValue(filter.getValue()));
+          });
+        }
 
         return UrnSearchResultsMapper.map(_entityClient.searchAcrossEntities(
             SEARCHABLE_ENTITY_TYPES.stream().map(EntityTypeMapper::getName).collect(Collectors.toList()),
             query,
-            new Filter().setOr(new ConjunctiveCriterionArray(
-                new ConjunctiveCriterion().setAnd(new CriterionArray(ImmutableList.of(filterCriterion)))
-            )),
+            new Filter().setOr(new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(criteria))),
             start,
             count,
             null,
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolver.java
index 6ed8639592d6e..3a751e502eb10 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolver.java
@@ -1,22 +1,24 @@
 package com.linkedin.datahub.graphql.resolvers.domain;
 
 import com.linkedin.common.urn.Urn;
+import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.datahub.graphql.QueryContext;
-import com.linkedin.datahub.graphql.authorization.AuthorizationUtils;
-import com.linkedin.datahub.graphql.exception.AuthorizationException;
 import com.linkedin.datahub.graphql.generated.Domain;
 import com.linkedin.datahub.graphql.generated.EntityType;
 import com.linkedin.datahub.graphql.generated.ListDomainsInput;
 import com.linkedin.datahub.graphql.generated.ListDomainsResult;
+import com.linkedin.datahub.graphql.resolvers.mutate.util.DomainUtils;
 import com.linkedin.entity.client.EntityClient;
 import com.linkedin.metadata.Constants;
 import com.linkedin.metadata.query.SearchFlags;
+import com.linkedin.metadata.query.filter.Filter;
 import com.linkedin.metadata.query.filter.SortCriterion;
 import com.linkedin.metadata.query.filter.SortOrder;
 import com.linkedin.metadata.search.SearchEntity;
 import com.linkedin.metadata.search.SearchResult;
 import graphql.schema.DataFetcher;
 import graphql.schema.DataFetchingEnvironment;
+
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.CompletableFuture;
@@ -30,7 +32,6 @@
  * Resolver used for listing all Domains defined within DataHub. Requires the MANAGE_DOMAINS platform privilege.
  */
 public class ListDomainsResolver implements DataFetcher<CompletableFuture<ListDomainsResult>> {
-
   private static final Integer DEFAULT_START = 0;
   private static final Integer DEFAULT_COUNT = 20;
   private static final String DEFAULT_QUERY = "";
@@ -48,18 +49,19 @@ public CompletableFuture<ListDomainsResult> get(final DataFetchingEnvironment en
 
     return CompletableFuture.supplyAsync(() -> {
 
-      if (AuthorizationUtils.canCreateDomains(context)) {
         final ListDomainsInput input = bindArgument(environment.getArgument("input"), ListDomainsInput.class);
         final Integer start = input.getStart() == null ? DEFAULT_START : input.getStart();
         final Integer count = input.getCount() == null ? DEFAULT_COUNT : input.getCount();
         final String query = input.getQuery() == null ? DEFAULT_QUERY : input.getQuery();
+        final Urn parentDomainUrn = input.getParentDomain() != null ? UrnUtils.getUrn(input.getParentDomain()) : null;
+        final Filter filter = DomainUtils.buildParentDomainFilter(parentDomainUrn);
 
         try {
-          // First, get all group Urns.
+          // First, get all domain Urns.
           final SearchResult gmsResult = _entityClient.search(
                   Constants.DOMAIN_ENTITY_NAME,
                   query,
-                  null,
+                  filter,
                   new SortCriterion().setField(DOMAIN_CREATED_TIME_INDEX_FIELD_NAME).setOrder(SortOrder.DESCENDING),
                   start,
                   count,
@@ -78,8 +80,6 @@ public CompletableFuture<ListDomainsResult> get(final DataFetchingEnvironment en
         } catch (Exception e) {
           throw new RuntimeException("Failed to list domains", e);
         }
-      }
-      throw new AuthorizationException("Unauthorized to perform this action. Please contact your DataHub administrator.");
     });
   }
 
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ParentDomainsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ParentDomainsResolver.java
new file mode 100644
index 0000000000000..dcaa7d61ed90c
--- /dev/null
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ParentDomainsResolver.java
@@ -0,0 +1,59 @@
+package com.linkedin.datahub.graphql.resolvers.domain;
+
+import com.linkedin.common.urn.Urn;
+import com.linkedin.common.urn.UrnUtils;
+import com.linkedin.datahub.graphql.QueryContext;
+import com.linkedin.datahub.graphql.generated.Entity;
+import com.linkedin.datahub.graphql.generated.ParentDomainsResult;
+import com.linkedin.datahub.graphql.resolvers.mutate.util.DomainUtils;
+import com.linkedin.entity.client.EntityClient;
+import graphql.schema.DataFetcher;
+import graphql.schema.DataFetchingEnvironment;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.CompletableFuture;
+
+import static com.linkedin.metadata.Constants.DOMAIN_ENTITY_NAME;
+
+public class ParentDomainsResolver implements DataFetcher<CompletableFuture<ParentDomainsResult>> {
+
+    private final EntityClient _entityClient;
+
+    public ParentDomainsResolver(final EntityClient entityClient) {
+        _entityClient = entityClient;
+    }
+
+    @Override
+    public CompletableFuture<ParentDomainsResult> get(DataFetchingEnvironment environment) {
+        final QueryContext context = environment.getContext();
+        final Urn urn = UrnUtils.getUrn(((Entity) environment.getSource()).getUrn());
+        final List<Entity> parentDomains = new ArrayList<>();
+        final Set<String> visitedParentUrns = new HashSet<>();
+
+        if (!DOMAIN_ENTITY_NAME.equals(urn.getEntityType())) {
+            throw new IllegalArgumentException(String.format("Failed to resolve parents for entity type %s", urn));
+        }
+
+        return CompletableFuture.supplyAsync(() -> {
+            try {
+                Entity parentDomain = DomainUtils.getParentDomain(urn, context, _entityClient);
+
+                while (parentDomain != null && !visitedParentUrns.contains(parentDomain.getUrn())) {
+                    parentDomains.add(parentDomain);
+                    visitedParentUrns.add(parentDomain.getUrn());
+                    parentDomain = DomainUtils.getParentDomain(Urn.createFromString(parentDomain.getUrn()), context, _entityClient);
+                }
+
+                final ParentDomainsResult result = new ParentDomainsResult();
+                result.setCount(parentDomains.size());
+                result.setDomains(parentDomains);
+                return result;
+            } catch (Exception e) {
+                throw new RuntimeException(String.format("Failed to load parent domains for entity %s", urn), e);
+            }
+        });
+    }
+}
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MoveDomainResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MoveDomainResolver.java
new file mode 100644
index 0000000000000..e5e3a5a0ee42e
--- /dev/null
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MoveDomainResolver.java
@@ -0,0 +1,89 @@
+package com.linkedin.datahub.graphql.resolvers.mutate;
+
+import com.linkedin.common.urn.CorpuserUrn;
+import com.linkedin.common.urn.Urn;
+import com.linkedin.common.urn.UrnUtils;
+import com.linkedin.data.template.SetMode;
+import com.linkedin.datahub.graphql.QueryContext;
+import com.linkedin.datahub.graphql.authorization.AuthorizationUtils;
+import com.linkedin.datahub.graphql.exception.AuthorizationException;
+import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode;
+import com.linkedin.datahub.graphql.exception.DataHubGraphQLException;
+import com.linkedin.datahub.graphql.generated.MoveDomainInput;
+import com.linkedin.datahub.graphql.resolvers.ResolverUtils;
+import com.linkedin.datahub.graphql.resolvers.mutate.util.DomainUtils;
+import com.linkedin.domain.DomainProperties;
+import com.linkedin.entity.client.EntityClient;
+import com.linkedin.metadata.Constants;
+import com.linkedin.metadata.entity.EntityService;
+import com.linkedin.metadata.entity.EntityUtils;
+import graphql.schema.DataFetcher;
+import graphql.schema.DataFetchingEnvironment;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+
+import java.util.concurrent.CompletableFuture;
+
+@Slf4j
+@RequiredArgsConstructor
+public class MoveDomainResolver implements DataFetcher<CompletableFuture<Boolean>> {
+
+  private final EntityService _entityService;
+  private final EntityClient _entityClient;
+
+  @Override
+  public CompletableFuture<Boolean> get(DataFetchingEnvironment environment) throws Exception {
+    final MoveDomainInput input = ResolverUtils.bindArgument(environment.getArgument("input"), MoveDomainInput.class);
+    final QueryContext context = environment.getContext();
+    final Urn resourceUrn = UrnUtils.getUrn(input.getResourceUrn());
+    final Urn newParentDomainUrn = input.getParentDomain() != null ? UrnUtils.getUrn(input.getParentDomain()) : null;
+
+    return CompletableFuture.supplyAsync(() -> {
+      if (!AuthorizationUtils.canManageDomains(context)) {
+        throw new AuthorizationException("Unauthorized to perform this action. Please contact your DataHub administrator.");
+      }
+
+      try {
+        if (!resourceUrn.getEntityType().equals(Constants.DOMAIN_ENTITY_NAME)) {
+          throw new IllegalArgumentException("Resource is not a domain.");
+        }
+
+        DomainProperties properties = (DomainProperties) EntityUtils.getAspectFromEntity(
+            resourceUrn.toString(),
+            Constants.DOMAIN_PROPERTIES_ASPECT_NAME, _entityService,
+            null
+        );
+
+        if (properties == null) {
+          throw new IllegalArgumentException("Domain properties do not exist.");
+        }
+
+        if (newParentDomainUrn != null) {
+          if (!newParentDomainUrn.getEntityType().equals(Constants.DOMAIN_ENTITY_NAME)) {
+            throw new IllegalArgumentException("Parent entity is not a domain.");
+          }
+          if (!_entityService.exists(newParentDomainUrn)) {
+            throw new IllegalArgumentException("Parent entity does not exist.");
+          }
+        }
+
+        if (DomainUtils.hasNameConflict(properties.getName(), newParentDomainUrn, context, _entityClient)) {
+          throw new DataHubGraphQLException(
+              String.format("\"%s\" already exists in the destination domain. Please pick a unique name.", properties.getName()),
+              DataHubGraphQLErrorCode.CONFLICT
+          );
+        }
+
+        properties.setParentDomain(newParentDomainUrn, SetMode.REMOVE_IF_NULL);
+        Urn actor = CorpuserUrn.createFromString(context.getActorUrn());
+        MutationUtils.persistAspect(resourceUrn, Constants.DOMAIN_PROPERTIES_ASPECT_NAME, properties, actor, _entityService);
+        return true;
+      } catch (DataHubGraphQLException e) {
+        throw e;
+      } catch (Exception e) {
+        log.error("Failed to move domain {} to parent {} : {}", input.getResourceUrn(), input.getParentDomain(), e.getMessage());
+        throw new RuntimeException(String.format("Failed to move domain %s to %s", input.getResourceUrn(), input.getParentDomain()), e);
+      }
+    });
+  }
+}
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java
index 225bee54142c4..0e316ac1296ee 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java
@@ -6,8 +6,11 @@
 import com.linkedin.datahub.graphql.QueryContext;
 import com.linkedin.datahub.graphql.authorization.AuthorizationUtils;
 import com.linkedin.datahub.graphql.exception.AuthorizationException;
+import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode;
+import com.linkedin.datahub.graphql.exception.DataHubGraphQLException;
 import com.linkedin.datahub.graphql.generated.UpdateNameInput;
 import com.linkedin.datahub.graphql.resolvers.dataproduct.DataProductAuthorizationUtils;
+import com.linkedin.datahub.graphql.resolvers.mutate.util.DomainUtils;
 import com.linkedin.datahub.graphql.resolvers.mutate.util.GlossaryUtils;
 import com.linkedin.dataproduct.DataProductProperties;
 import com.linkedin.domain.DomainProperties;
@@ -124,14 +127,25 @@ private Boolean updateDomainName(
       try {
         DomainProperties domainProperties = (DomainProperties) EntityUtils.getAspectFromEntity(
             targetUrn.toString(), Constants.DOMAIN_PROPERTIES_ASPECT_NAME, _entityService, null);
+
         if (domainProperties == null) {
           throw new IllegalArgumentException("Domain does not exist");
         }
+
+        if (DomainUtils.hasNameConflict(input.getName(), DomainUtils.getParentDomainSafely(domainProperties), context, _entityClient)) {
+          throw new DataHubGraphQLException(
+              String.format("\"%s\" already exists in this domain. Please pick a unique name.", input.getName()),
+              DataHubGraphQLErrorCode.CONFLICT
+          );
+        }
+
         domainProperties.setName(input.getName());
         Urn actor = CorpuserUrn.createFromString(context.getActorUrn());
         persistAspect(targetUrn, Constants.DOMAIN_PROPERTIES_ASPECT_NAME, domainProperties, actor, _entityService);
 
         return true;
+      } catch (DataHubGraphQLException e) {
+        throw e;
       } catch (Exception e) {
         throw new RuntimeException(String.format("Failed to perform update against input %s", input), e);
       }
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java
index b57160be09d32..585fbdf53a2ba 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java
@@ -5,29 +5,55 @@
 import com.linkedin.common.UrnArray;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
+import com.linkedin.data.DataMap;
 import com.linkedin.datahub.graphql.QueryContext;
 import com.linkedin.datahub.graphql.authorization.AuthorizationUtils;
 import com.datahub.authorization.ConjunctivePrivilegeGroup;
 import com.datahub.authorization.DisjunctivePrivilegeGroup;
+import com.linkedin.datahub.graphql.generated.Entity;
 import com.linkedin.datahub.graphql.generated.ResourceRefInput;
+import com.linkedin.datahub.graphql.types.common.mappers.UrnToEntityMapper;
+import com.linkedin.domain.DomainProperties;
 import com.linkedin.domain.Domains;
+import com.linkedin.entity.EntityResponse;
+import com.linkedin.entity.client.EntityClient;
 import com.linkedin.metadata.Constants;
 import com.linkedin.metadata.authorization.PoliciesConfig;
 import com.linkedin.metadata.entity.EntityService;
 import com.linkedin.metadata.entity.EntityUtils;
+import com.linkedin.metadata.query.filter.Condition;
+import com.linkedin.metadata.query.filter.ConjunctiveCriterion;
+import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray;
+import com.linkedin.metadata.query.filter.Criterion;
+import com.linkedin.metadata.query.filter.CriterionArray;
+import com.linkedin.metadata.query.filter.Filter;
+import com.linkedin.metadata.search.SearchEntity;
+import com.linkedin.metadata.search.SearchResult;
 import com.linkedin.mxe.MetadataChangeProposal;
+
+import com.linkedin.r2.RemoteInvocationException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
+
 import lombok.extern.slf4j.Slf4j;
 
 import static com.linkedin.datahub.graphql.resolvers.mutate.MutationUtils.*;
+import static com.linkedin.metadata.Constants.*;
 
 
 // TODO: Move to consuming from DomainService.
 @Slf4j
 public class DomainUtils {
+  private static final String PARENT_DOMAIN_INDEX_FIELD_NAME = "parentDomain.keyword";
+  private static final String HAS_PARENT_DOMAIN_INDEX_FIELD_NAME = "hasParentDomain";
+  private static final String NAME_INDEX_FIELD_NAME = "name";
+
   private static final ConjunctivePrivilegeGroup ALL_PRIVILEGES_GROUP = new ConjunctivePrivilegeGroup(ImmutableList.of(
       PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType()
   ));
@@ -85,4 +111,200 @@ public static void validateDomain(Urn domainUrn, EntityService entityService) {
       throw new IllegalArgumentException(String.format("Failed to validate Domain with urn %s. Urn does not exist.", domainUrn));
     }
   }
+
+  private static List<Criterion> buildRootDomainCriteria() {
+    final List<Criterion> criteria = new ArrayList<>();
+
+    criteria.add(
+        new Criterion()
+            .setField(HAS_PARENT_DOMAIN_INDEX_FIELD_NAME)
+            .setValue("false")
+            .setCondition(Condition.EQUAL)
+    );
+    criteria.add(
+        new Criterion()
+            .setField(HAS_PARENT_DOMAIN_INDEX_FIELD_NAME)
+            .setValue("")
+            .setCondition(Condition.IS_NULL)
+    );
+
+    return criteria;
+  }
+
+  private static List<Criterion> buildParentDomainCriteria(@Nonnull final Urn parentDomainUrn) {
+    final List<Criterion> criteria = new ArrayList<>();
+
+    criteria.add(
+        new Criterion()
+            .setField(HAS_PARENT_DOMAIN_INDEX_FIELD_NAME)
+            .setValue("true")
+            .setCondition(Condition.EQUAL)
+    );
+    criteria.add(
+        new Criterion()
+            .setField(PARENT_DOMAIN_INDEX_FIELD_NAME)
+            .setValue(parentDomainUrn.toString())
+            .setCondition(Condition.EQUAL)
+    );
+
+    return criteria;
+  }
+
+  private static Criterion buildNameCriterion(@Nonnull final String name) {
+    return new Criterion()
+        .setField(NAME_INDEX_FIELD_NAME)
+        .setValue(name)
+        .setCondition(Condition.EQUAL);
+  }
+
+  /**
+   * Builds a filter that ORs together the root parent criterion / ANDs together the parent domain criterion.
+   * The reason for the OR on root is elastic can have a null|false value to represent an root domain in the index.
+   * @param name an optional name to AND in to each condition of the filter
+   * @param parentDomainUrn the parent domain (null means root).
+   * @return the Filter
+   */
+  public static Filter buildNameAndParentDomainFilter(@Nullable final String name, @Nullable final Urn parentDomainUrn) {
+    if (parentDomainUrn == null) {
+      return new Filter().setOr(
+          new ConjunctiveCriterionArray(
+              buildRootDomainCriteria().stream().map(parentCriterion -> {
+                final CriterionArray array = new CriterionArray(parentCriterion);
+                if (name != null) {
+                  array.add(buildNameCriterion(name));
+                }
+                return new ConjunctiveCriterion().setAnd(array);
+              }).collect(Collectors.toList())
+          )
+      );
+    }
+
+    final CriterionArray andArray = new CriterionArray(buildParentDomainCriteria(parentDomainUrn));
+    if (name != null) {
+      andArray.add(buildNameCriterion(name));
+    }
+    return new Filter().setOr(
+        new ConjunctiveCriterionArray(
+            new ConjunctiveCriterion().setAnd(andArray)
+        )
+    );
+  }
+
+  public static Filter buildParentDomainFilter(@Nullable final Urn parentDomainUrn) {
+    return buildNameAndParentDomainFilter(null, parentDomainUrn);
+  }
+
+  /**
+   * Check if a domain has any child domains
+   * @param domainUrn the URN of the domain to check
+   * @param context query context (includes authorization context to authorize the request)
+   * @param entityClient client used to perform the check
+   * @return true if the domain has any child domains, false if it does not
+   */
+  public static boolean hasChildDomains(
+      @Nonnull final Urn domainUrn,
+      @Nonnull final QueryContext context,
+      @Nonnull final EntityClient entityClient
+  ) throws RemoteInvocationException {
+    Filter parentDomainFilter = buildParentDomainFilter(domainUrn);
+    // Search for entities matching parent domain
+    // Limit count to 1 for existence check
+    final SearchResult searchResult = entityClient.filter(
+        DOMAIN_ENTITY_NAME,
+        parentDomainFilter,
+        null,
+        0,
+        1,
+        context.getAuthentication());
+    return (searchResult.getNumEntities() > 0);
+  }
+
+  private static Map<Urn, EntityResponse> getDomainsByNameAndParent(
+      @Nonnull final String name,
+      @Nullable final Urn parentDomainUrn,
+      @Nonnull final QueryContext context,
+      @Nonnull final EntityClient entityClient
+  ) {
+    try {
+      final Filter filter = buildNameAndParentDomainFilter(name, parentDomainUrn);
+
+      final SearchResult searchResult = entityClient.filter(
+          DOMAIN_ENTITY_NAME,
+          filter,
+          null,
+          0,
+          1000,
+          context.getAuthentication());
+
+      final Set<Urn> domainUrns = searchResult.getEntities()
+          .stream()
+          .map(SearchEntity::getEntity)
+          .collect(Collectors.toSet());
+
+      return entityClient.batchGetV2(
+          DOMAIN_ENTITY_NAME,
+          domainUrns,
+          Collections.singleton(DOMAIN_PROPERTIES_ASPECT_NAME),
+          context.getAuthentication());
+    } catch (Exception e) {
+      throw new RuntimeException("Failed fetching Domains by name and parent", e);
+    }
+  }
+
+  public static boolean hasNameConflict(
+      @Nonnull final String name,
+      @Nullable final Urn parentDomainUrn,
+      @Nonnull final QueryContext context,
+      @Nonnull final EntityClient entityClient
+  ) {
+    final Map<Urn, EntityResponse> entities = getDomainsByNameAndParent(name, parentDomainUrn, context, entityClient);
+
+    // Even though we searched by name, do one more pass to check the name is unique
+    return entities.values().stream().anyMatch(entityResponse -> {
+      if (entityResponse.getAspects().containsKey(DOMAIN_PROPERTIES_ASPECT_NAME)) {
+        DataMap dataMap = entityResponse.getAspects().get(DOMAIN_PROPERTIES_ASPECT_NAME).getValue().data();
+        DomainProperties domainProperties = new DomainProperties(dataMap);
+        return (domainProperties.hasName() && domainProperties.getName().equals(name));
+      }
+      return false;
+    });
+  }
+
+  @Nullable
+  public static Entity getParentDomain(
+      @Nonnull final Urn urn,
+      @Nonnull final QueryContext context,
+      @Nonnull final EntityClient entityClient
+  ) {
+    try {
+      final EntityResponse entityResponse = entityClient.getV2(
+          DOMAIN_ENTITY_NAME,
+          urn,
+          Collections.singleton(DOMAIN_PROPERTIES_ASPECT_NAME),
+          context.getAuthentication()
+      );
+
+      if (entityResponse != null && entityResponse.getAspects().containsKey(DOMAIN_PROPERTIES_ASPECT_NAME)) {
+        final DomainProperties properties = new DomainProperties(entityResponse.getAspects().get(DOMAIN_PROPERTIES_ASPECT_NAME).getValue().data());
+        final Urn parentDomainUrn = getParentDomainSafely(properties);
+        return parentDomainUrn != null ? UrnToEntityMapper.map(parentDomainUrn) : null;
+      }
+    } catch (Exception e) {
+      throw new RuntimeException(String.format("Failed to retrieve parent domain for entity %s", urn), e);
+    }
+
+    return null;
+  }
+
+  /**
+   * Get a parent domain only if hasParentDomain was set. There is strange elastic behavior where moving a domain
+   * to the root leaves the parentDomain field set but makes hasParentDomain false. This helper makes sure that queries
+   * to elastic where hasParentDomain=false and parentDomain=value only gives us the parentDomain if hasParentDomain=true.
+   * @param properties the domain properties aspect
+   * @return the parentDomain or null
+   */
+  @Nullable
+  public static Urn getParentDomainSafely(@Nonnull final DomainProperties properties) {
+    return properties.hasParentDomain() ? properties.getParentDomain() : null;
+  }
 }
\ No newline at end of file
diff --git a/datahub-graphql-core/src/main/resources/app.graphql b/datahub-graphql-core/src/main/resources/app.graphql
index a5057bcf644da..075a3b0fac43b 100644
--- a/datahub-graphql-core/src/main/resources/app.graphql
+++ b/datahub-graphql-core/src/main/resources/app.graphql
@@ -441,10 +441,17 @@ type FeatureFlagsConfig {
   Whether we should show CTAs in the UI related to moving to Managed DataHub by Acryl.
   """
   showAcrylInfo: Boolean!
+
   """
   Whether we should show AccessManagement tab in the datahub UI.
   """
   showAccessManagement: Boolean!
+
+  """
+  Enables the nested Domains feature that allows users to have sub-Domains.
+  If this is off, Domains appear "flat" again.
+  """
+  nestedDomainsEnabled: Boolean!
 }
 
 """
diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql
index 044c405942a3c..39f86948c77c4 100644
--- a/datahub-graphql-core/src/main/resources/entity.graphql
+++ b/datahub-graphql-core/src/main/resources/entity.graphql
@@ -434,6 +434,11 @@ type Mutation {
     """
     createDomain(input: CreateDomainInput!): String
 
+    """
+    Moves a domain to be parented under another domain.
+    """
+    moveDomain(input: MoveDomainInput!): Boolean
+
     """
     Delete a Domain
     """
@@ -7735,6 +7740,21 @@ input UpdateParentNodeInput {
     resourceUrn: String!
 }
 
+"""
+Input for updating the parent domain of a domain.
+"""
+input MoveDomainInput {
+    """
+    The new parent domain urn. If parentDomain is null, this will remove the parent from this entity
+    """
+    parentDomain: String
+
+    """
+    The primary key of the resource to update the parent domain for
+    """
+    resourceUrn: String!
+}
+
 """
 Input for updating the name of an entity
 """
@@ -9584,15 +9604,31 @@ type Domain implements Entity {
     """
     entities(input: DomainEntitiesInput): SearchResults
 
+    """
+    Recursively get the lineage of parent domains for this entity
+    """
+    parentDomains: ParentDomainsResult
+
     """
     Edges extending from this entity
     """
     relationships(input: RelationshipsInput!): EntityRelationshipsResult
 }
 
+"""
+All of the parent domains starting from a single Domain through all of its ancestors
+"""
+type ParentDomainsResult {
+    """
+    The number of parent domains bubbling up for this entity
+    """
+    count: Int!
 
-
-
+    """
+    A list of parent domains in order from direct parent, to parent's parent etc. If there are no parents, return an empty list
+    """
+    domains: [Entity!]!
+}
 
 """
 Properties about a domain
@@ -9652,6 +9688,11 @@ input CreateDomainInput {
   Optional description for the Domain
   """
   description: String
+
+  """
+  Optional parent domain urn for the domain
+  """
+  parentDomain: String
 }
 
 """
@@ -9672,6 +9713,11 @@ input ListDomainsInput {
   Optional search query
   """
   query: String
+
+  """
+  Optional parent domain
+  """
+  parentDomain: String
 }
 
 """
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolverTest.java
index 8c19f1dc3eb34..560a3865ce9e1 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolverTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolverTest.java
@@ -6,35 +6,57 @@
 import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.datahub.graphql.QueryContext;
 import com.linkedin.datahub.graphql.generated.CreateDomainInput;
+import com.linkedin.datahub.graphql.resolvers.mutate.util.DomainUtils;
 import com.linkedin.domain.DomainProperties;
+import com.linkedin.entity.Aspect;
+import com.linkedin.entity.EntityResponse;
+import com.linkedin.entity.EnvelopedAspect;
+import com.linkedin.entity.EnvelopedAspectMap;
 import com.linkedin.entity.client.EntityClient;
 import com.linkedin.events.metadata.ChangeType;
 import com.linkedin.metadata.Constants;
 import com.linkedin.metadata.key.DomainKey;
+import com.linkedin.metadata.search.SearchEntity;
+import com.linkedin.metadata.search.SearchEntityArray;
+import com.linkedin.metadata.search.SearchResult;
 import com.linkedin.metadata.utils.GenericRecordUtils;
 import com.linkedin.metadata.entity.EntityService;
 import com.linkedin.mxe.MetadataChangeProposal;
 import com.linkedin.r2.RemoteInvocationException;
 import graphql.schema.DataFetchingEnvironment;
+
+import java.util.HashMap;
+import java.util.Map;
 import java.util.concurrent.CompletionException;
 import org.mockito.Mockito;
 import org.testng.annotations.Test;
 
 import static com.linkedin.datahub.graphql.TestUtils.*;
+import static com.linkedin.metadata.Constants.DOMAIN_PROPERTIES_ASPECT_NAME;
 import static org.testng.Assert.*;
 
 
 public class CreateDomainResolverTest {
 
+  private static final Urn TEST_DOMAIN_URN = Urn.createFromTuple("domain", "test-id");
+  private static final Urn TEST_PARENT_DOMAIN_URN = Urn.createFromTuple("domain", "test-parent-id");
+
   private static final CreateDomainInput TEST_INPUT = new CreateDomainInput(
       "test-id",
       "test-name",
-      "test-description"
+      "test-description",
+      TEST_PARENT_DOMAIN_URN.toString()
+  );
+
+  private static final CreateDomainInput TEST_INPUT_NO_PARENT_DOMAIN = new CreateDomainInput(
+      "test-id",
+      "test-name",
+      "test-description",
+      null
   );
+
   private static final Urn TEST_ACTOR_URN = UrnUtils.getUrn("urn:li:corpuser:test");
-  private static final String TEST_ENTITY_URN = "urn:li:dataset:(urn:li:dataPlatform:mysql,my-test,PROD)";
-  private static final String TEST_TAG_1_URN = "urn:li:tag:test-id-1";
-  private static final String TEST_TAG_2_URN = "urn:li:tag:test-id-2";
+
 
   @Test
   public void testGetSuccess() throws Exception {
@@ -43,12 +65,31 @@ public void testGetSuccess() throws Exception {
     EntityService mockService = getMockEntityService();
     CreateDomainResolver resolver = new CreateDomainResolver(mockClient, mockService);
 
+    Mockito.when(mockClient.exists(
+        Mockito.eq(TEST_DOMAIN_URN),
+        Mockito.any(Authentication.class)
+    )).thenReturn(false);
+
+    Mockito.when(mockClient.exists(
+        Mockito.eq(TEST_PARENT_DOMAIN_URN),
+        Mockito.any(Authentication.class)
+    )).thenReturn(true);
+
     // Execute resolver
     QueryContext mockContext = getMockAllowContext();
     DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class);
     Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(TEST_INPUT);
     Mockito.when(mockEnv.getContext()).thenReturn(mockContext);
 
+    Mockito.when(mockClient.filter(
+        Mockito.eq(Constants.DOMAIN_ENTITY_NAME),
+        Mockito.eq(DomainUtils.buildNameAndParentDomainFilter(TEST_INPUT.getName(), TEST_PARENT_DOMAIN_URN)),
+        Mockito.eq(null),
+        Mockito.any(Integer.class),
+        Mockito.any(Integer.class),
+        Mockito.any(Authentication.class)
+    )).thenReturn(new SearchResult().setEntities(new SearchEntityArray()));
+
     resolver.get(mockEnv).get();
 
     final DomainKey key = new DomainKey();
@@ -60,6 +101,7 @@ public void testGetSuccess() throws Exception {
     props.setDescription("test-description");
     props.setName("test-name");
     props.setCreated(new AuditStamp().setActor(TEST_ACTOR_URN).setTime(0L));
+    props.setParentDomain(TEST_PARENT_DOMAIN_URN);
     proposal.setAspectName(Constants.DOMAIN_PROPERTIES_ASPECT_NAME);
     proposal.setAspect(GenericRecordUtils.serializeAspect(props));
     proposal.setChangeType(ChangeType.UPSERT);
@@ -72,6 +114,133 @@ public void testGetSuccess() throws Exception {
     );
   }
 
+  @Test
+  public void testGetSuccessNoParentDomain() throws Exception {
+    EntityClient mockClient = Mockito.mock(EntityClient.class);
+    EntityService mockService = Mockito.mock(EntityService.class);
+    CreateDomainResolver resolver = new CreateDomainResolver(mockClient, mockService);
+
+    Mockito.when(mockClient.exists(
+        Mockito.eq(TEST_DOMAIN_URN),
+        Mockito.any(Authentication.class)
+    )).thenReturn(false);
+
+    QueryContext mockContext = getMockAllowContext();
+    DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class);
+    Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(TEST_INPUT_NO_PARENT_DOMAIN);
+    Mockito.when(mockEnv.getContext()).thenReturn(mockContext);
+
+    Mockito.when(mockClient.filter(
+        Mockito.eq(Constants.DOMAIN_ENTITY_NAME),
+        Mockito.eq(DomainUtils.buildNameAndParentDomainFilter(TEST_INPUT.getName(), null)),
+        Mockito.eq(null),
+        Mockito.any(Integer.class),
+        Mockito.any(Integer.class),
+        Mockito.any(Authentication.class)
+    )).thenReturn(new SearchResult().setEntities(new SearchEntityArray()));
+
+    resolver.get(mockEnv).get();
+
+    final DomainKey key = new DomainKey();
+    key.setId("test-id");
+    final MetadataChangeProposal proposal = new MetadataChangeProposal();
+    proposal.setEntityKeyAspect(GenericRecordUtils.serializeAspect(key));
+    proposal.setEntityType(Constants.DOMAIN_ENTITY_NAME);
+    DomainProperties props = new DomainProperties();
+    props.setDescription("test-description");
+    props.setName("test-name");
+    props.setCreated(new AuditStamp().setActor(TEST_ACTOR_URN).setTime(0L));
+    proposal.setAspectName(Constants.DOMAIN_PROPERTIES_ASPECT_NAME);
+    proposal.setAspect(GenericRecordUtils.serializeAspect(props));
+    proposal.setChangeType(ChangeType.UPSERT);
+
+    Mockito.verify(mockClient, Mockito.times(1)).ingestProposal(
+        Mockito.argThat(new CreateDomainProposalMatcher(proposal)),
+        Mockito.any(Authentication.class),
+        Mockito.eq(false)
+    );
+  }
+
+  @Test
+  public void testGetInvalidParent() throws Exception {
+    EntityClient mockClient = Mockito.mock(EntityClient.class);
+    EntityService mockService = Mockito.mock(EntityService.class);
+    CreateDomainResolver resolver = new CreateDomainResolver(mockClient, mockService);
+
+    Mockito.when(mockClient.exists(
+        Mockito.eq(TEST_DOMAIN_URN),
+        Mockito.any(Authentication.class)
+    )).thenReturn(false);
+
+    Mockito.when(mockClient.exists(
+        Mockito.eq(TEST_PARENT_DOMAIN_URN),
+        Mockito.any(Authentication.class)
+    )).thenReturn(false);
+
+    QueryContext mockContext = getMockAllowContext();
+    DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class);
+    Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(TEST_INPUT);
+    Mockito.when(mockEnv.getContext()).thenReturn(mockContext);
+
+    assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join());
+  }
+
+  @Test
+  public void testGetNameConflict() throws Exception {
+    EntityClient mockClient = Mockito.mock(EntityClient.class);
+    EntityService mockService = Mockito.mock(EntityService.class);
+    CreateDomainResolver resolver = new CreateDomainResolver(mockClient, mockService);
+
+    Mockito.when(mockClient.exists(
+        Mockito.eq(TEST_DOMAIN_URN),
+        Mockito.any(Authentication.class)
+    )).thenReturn(false);
+
+    Mockito.when(mockClient.exists(
+        Mockito.eq(TEST_PARENT_DOMAIN_URN),
+        Mockito.any(Authentication.class)
+    )).thenReturn(true);
+
+    QueryContext mockContext = getMockAllowContext();
+    DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class);
+    Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(TEST_INPUT);
+    Mockito.when(mockEnv.getContext()).thenReturn(mockContext);
+
+    Mockito.when(mockClient.filter(
+        Mockito.eq(Constants.DOMAIN_ENTITY_NAME),
+        Mockito.eq(DomainUtils.buildNameAndParentDomainFilter(TEST_INPUT.getName(), TEST_PARENT_DOMAIN_URN)),
+        Mockito.eq(null),
+        Mockito.any(Integer.class),
+        Mockito.any(Integer.class),
+        Mockito.any(Authentication.class)
+    )).thenReturn(new SearchResult().setEntities(
+        new SearchEntityArray(new SearchEntity().setEntity(TEST_DOMAIN_URN))
+    ));
+
+    DomainProperties domainProperties = new DomainProperties();
+    domainProperties.setDescription(TEST_INPUT.getDescription());
+    domainProperties.setName(TEST_INPUT.getName());
+    domainProperties.setCreated(new AuditStamp().setActor(TEST_ACTOR_URN).setTime(0L));
+    domainProperties.setParentDomain(TEST_PARENT_DOMAIN_URN);
+
+    EntityResponse entityResponse = new EntityResponse();
+    EnvelopedAspectMap envelopedAspectMap = new EnvelopedAspectMap();
+    envelopedAspectMap.put(DOMAIN_PROPERTIES_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(domainProperties.data())));
+    entityResponse.setAspects(envelopedAspectMap);
+
+    Map<Urn, EntityResponse> entityResponseMap = new HashMap<>();
+    entityResponseMap.put(TEST_DOMAIN_URN, entityResponse);
+
+    Mockito.when(mockClient.batchGetV2(
+        Mockito.eq(Constants.DOMAIN_ENTITY_NAME),
+        Mockito.any(),
+        Mockito.any(),
+        Mockito.any(Authentication.class)
+    )).thenReturn(entityResponseMap);
+
+    assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join());
+  }
+
   @Test
   public void testGetUnauthorized() throws Exception {
     // Create resolver
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DeleteDomainResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DeleteDomainResolverTest.java
index 1c450b0e85424..9bcdbe6d2a0e0 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DeleteDomainResolverTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DeleteDomainResolverTest.java
@@ -4,6 +4,7 @@
 import com.linkedin.common.urn.Urn;
 import com.linkedin.datahub.graphql.QueryContext;
 import com.linkedin.entity.client.EntityClient;
+import com.linkedin.metadata.search.SearchResult;
 import graphql.schema.DataFetchingEnvironment;
 import java.util.concurrent.CompletionException;
 import org.mockito.Mockito;
@@ -28,6 +29,10 @@ public void testGetSuccess() throws Exception {
     Mockito.when(mockEnv.getArgument(Mockito.eq("urn"))).thenReturn(TEST_URN);
     Mockito.when(mockEnv.getContext()).thenReturn(mockContext);
 
+    // Domain has 0 child domains
+    Mockito.when(mockClient.filter(Mockito.eq("domain"), Mockito.any(), Mockito.any(), Mockito.eq(0), Mockito.eq(1), Mockito.any()))
+        .thenReturn(new SearchResult().setNumEntities(0));
+
     assertTrue(resolver.get(mockEnv).get());
 
     Mockito.verify(mockClient, Mockito.times(1)).deleteEntity(
@@ -36,6 +41,28 @@ public void testGetSuccess() throws Exception {
     );
   }
 
+  @Test
+  public void testDeleteWithChildDomains() throws Exception {
+    EntityClient mockClient = Mockito.mock(EntityClient.class);
+    DeleteDomainResolver resolver = new DeleteDomainResolver(mockClient);
+
+    // Execute resolver
+    QueryContext mockContext = getMockAllowContext();
+    DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class);
+    Mockito.when(mockEnv.getArgument(Mockito.eq("urn"))).thenReturn(TEST_URN);
+    Mockito.when(mockEnv.getContext()).thenReturn(mockContext);
+
+    // Domain has child domains
+    Mockito.when(mockClient.filter(Mockito.eq("domain"), Mockito.any(), Mockito.any(), Mockito.eq(0), Mockito.eq(1), Mockito.any()))
+        .thenReturn(new SearchResult().setNumEntities(1));
+
+    assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join());
+
+    Mockito.verify(mockClient, Mockito.times(0)).deleteEntity(
+        Mockito.any(),
+        Mockito.any(Authentication.class));
+  }
+
   @Test
   public void testGetUnauthorized() throws Exception {
     // Create resolver
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolverTest.java
index c143f3480fcff..bd8a8f98de497 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolverTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolverTest.java
@@ -5,6 +5,7 @@
 import com.linkedin.common.urn.Urn;
 import com.linkedin.datahub.graphql.QueryContext;
 import com.linkedin.datahub.graphql.generated.ListDomainsInput;
+import com.linkedin.datahub.graphql.resolvers.mutate.util.DomainUtils;
 import com.linkedin.entity.client.EntityClient;
 import com.linkedin.metadata.Constants;
 import com.linkedin.metadata.query.SearchFlags;
@@ -28,9 +29,14 @@
 public class ListDomainsResolverTest {
 
   private static final Urn TEST_DOMAIN_URN = Urn.createFromTuple("domain", "test-id");
+  private static final Urn TEST_PARENT_DOMAIN_URN = Urn.createFromTuple("domain", "test-parent-id");
 
   private static final ListDomainsInput TEST_INPUT = new ListDomainsInput(
-      0, 20, null
+      0, 20, null, TEST_PARENT_DOMAIN_URN.toString()
+  );
+
+  private static final ListDomainsInput TEST_INPUT_NO_PARENT_DOMAIN = new ListDomainsInput(
+      0, 20, null, null
   );
 
   @Test
@@ -41,7 +47,7 @@ public void testGetSuccess() throws Exception {
     Mockito.when(mockClient.search(
         Mockito.eq(Constants.DOMAIN_ENTITY_NAME),
         Mockito.eq(""),
-        Mockito.eq(null),
+        Mockito.eq(DomainUtils.buildParentDomainFilter(TEST_PARENT_DOMAIN_URN)),
         Mockito.eq(new SortCriterion().setField(DOMAIN_CREATED_TIME_INDEX_FIELD_NAME).setOrder(SortOrder.DESCENDING)),
         Mockito.eq(0),
         Mockito.eq(20),
@@ -71,6 +77,44 @@ public void testGetSuccess() throws Exception {
     assertEquals(resolver.get(mockEnv).get().getDomains().get(0).getUrn(), TEST_DOMAIN_URN.toString());
   }
 
+  @Test
+  public void testGetSuccessNoParentDomain() throws Exception {
+    // Create resolver
+    EntityClient mockClient = Mockito.mock(EntityClient.class);
+
+    Mockito.when(mockClient.search(
+        Mockito.eq(Constants.DOMAIN_ENTITY_NAME),
+        Mockito.eq(""),
+        Mockito.eq(DomainUtils.buildParentDomainFilter(null)),
+        Mockito.eq(new SortCriterion().setField(DOMAIN_CREATED_TIME_INDEX_FIELD_NAME).setOrder(SortOrder.DESCENDING)),
+        Mockito.eq(0),
+        Mockito.eq(20),
+        Mockito.any(Authentication.class),
+        Mockito.eq(new SearchFlags().setFulltext(true))
+    )).thenReturn(
+        new SearchResult()
+            .setFrom(0)
+            .setPageSize(1)
+            .setNumEntities(1)
+            .setEntities(new SearchEntityArray(ImmutableSet.of(new SearchEntity().setEntity(TEST_DOMAIN_URN))))
+    );
+
+    ListDomainsResolver resolver = new ListDomainsResolver(mockClient);
+
+    // Execute resolver
+    QueryContext mockContext = getMockAllowContext();
+    DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class);
+    Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(TEST_INPUT_NO_PARENT_DOMAIN);
+    Mockito.when(mockEnv.getContext()).thenReturn(mockContext);
+
+    // Data Assertions
+    assertEquals((int) resolver.get(mockEnv).get().getStart(), 0);
+    assertEquals((int) resolver.get(mockEnv).get().getCount(), 1);
+    assertEquals((int) resolver.get(mockEnv).get().getTotal(), 1);
+    assertEquals(resolver.get(mockEnv).get().getDomains().size(), 1);
+    assertEquals(resolver.get(mockEnv).get().getDomains().get(0).getUrn(), TEST_DOMAIN_URN.toString());
+  }
+
   @Test
   public void testGetUnauthorized() throws Exception {
     // Create resolver
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/MoveDomainResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/MoveDomainResolverTest.java
new file mode 100644
index 0000000000000..4059c180b0eb0
--- /dev/null
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/MoveDomainResolverTest.java
@@ -0,0 +1,140 @@
+package com.linkedin.datahub.graphql.resolvers.domain;
+
+import com.datahub.authentication.Authentication;
+import com.linkedin.common.AuditStamp;
+import com.linkedin.common.urn.CorpuserUrn;
+import com.linkedin.common.urn.Urn;
+import com.linkedin.datahub.graphql.QueryContext;
+import com.linkedin.datahub.graphql.generated.MoveDomainInput;
+import com.linkedin.datahub.graphql.resolvers.mutate.MoveDomainResolver;
+import com.linkedin.datahub.graphql.resolvers.mutate.MutationUtils;
+import com.linkedin.datahub.graphql.resolvers.mutate.util.DomainUtils;
+import com.linkedin.domain.DomainProperties;
+import com.linkedin.entity.client.EntityClient;
+import com.linkedin.metadata.Constants;
+import com.linkedin.metadata.entity.EntityService;
+import com.linkedin.metadata.search.SearchEntityArray;
+import com.linkedin.metadata.search.SearchResult;
+import com.linkedin.mxe.MetadataChangeProposal;
+import graphql.schema.DataFetchingEnvironment;
+import org.mockito.Mockito;
+import org.testng.annotations.Test;
+
+import java.util.concurrent.CompletionException;
+
+import static com.linkedin.datahub.graphql.TestUtils.*;
+import static com.linkedin.metadata.Constants.*;
+import static org.testng.Assert.assertThrows;
+import static org.testng.Assert.assertTrue;
+
+public class MoveDomainResolverTest {
+
+  private static final String CONTAINER_URN = "urn:li:container:00005397daf94708a8822b8106cfd451";
+  private static final String PARENT_DOMAIN_URN = "urn:li:domain:00005397daf94708a8822b8106cfd451";
+  private static final String DOMAIN_URN = "urn:li:domain:11115397daf94708a8822b8106cfd451";
+  private static final MoveDomainInput INPUT = new MoveDomainInput(PARENT_DOMAIN_URN, DOMAIN_URN);
+  private static final MoveDomainInput INVALID_INPUT = new MoveDomainInput(CONTAINER_URN, DOMAIN_URN);
+  private static final CorpuserUrn TEST_ACTOR_URN = new CorpuserUrn("test");
+
+  private MetadataChangeProposal setupTests(DataFetchingEnvironment mockEnv, EntityService mockService, EntityClient mockClient) throws Exception {
+    QueryContext mockContext = getMockAllowContext();
+    Mockito.when(mockContext.getAuthentication()).thenReturn(Mockito.mock(Authentication.class));
+    Mockito.when(mockContext.getActorUrn()).thenReturn(TEST_ACTOR_URN.toString());
+    Mockito.when(mockEnv.getContext()).thenReturn(mockContext);
+
+    final String name = "test name";
+    Mockito.when(mockService.getAspect(
+            Urn.createFromString(DOMAIN_URN),
+            Constants.DOMAIN_PROPERTIES_ASPECT_NAME,
+            0))
+        .thenReturn(new DomainProperties().setName(name));
+
+    Mockito.when(mockClient.filter(
+        Mockito.eq(Constants.DOMAIN_ENTITY_NAME),
+        Mockito.eq(DomainUtils.buildNameAndParentDomainFilter(name, Urn.createFromString(PARENT_DOMAIN_URN))),
+        Mockito.eq(null),
+        Mockito.any(Integer.class),
+        Mockito.any(Integer.class),
+        Mockito.any(Authentication.class)
+    )).thenReturn(new SearchResult().setEntities(new SearchEntityArray()));
+
+    DomainProperties properties = new DomainProperties();
+    properties.setName(name);
+    properties.setParentDomain(Urn.createFromString(PARENT_DOMAIN_URN));
+    return MutationUtils.buildMetadataChangeProposalWithUrn(Urn.createFromString(DOMAIN_URN),
+        DOMAIN_PROPERTIES_ASPECT_NAME, properties);
+  }
+
+  @Test
+  public void testGetSuccess() throws Exception {
+    EntityService mockService = Mockito.mock(EntityService.class);
+    EntityClient mockClient = Mockito.mock(EntityClient.class);
+    Mockito.when(mockService.exists(Urn.createFromString(PARENT_DOMAIN_URN))).thenReturn(true);
+    DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class);
+    Mockito.when(mockEnv.getArgument("input")).thenReturn(INPUT);
+
+    MoveDomainResolver resolver = new MoveDomainResolver(mockService, mockClient);
+    setupTests(mockEnv, mockService, mockClient);
+
+    assertTrue(resolver.get(mockEnv).get());
+    Mockito.verify(mockService, Mockito.times(1)).ingestProposal(
+        Mockito.any(MetadataChangeProposal.class),
+        Mockito.any(AuditStamp.class),
+        Mockito.eq(false)
+    );
+  }
+
+  @Test
+  public void testGetFailureEntityDoesNotExist() throws Exception {
+    EntityService mockService = Mockito.mock(EntityService.class);
+    EntityClient mockClient = Mockito.mock(EntityClient.class);
+    Mockito.when(mockService.exists(Urn.createFromString(PARENT_DOMAIN_URN))).thenReturn(true);
+    DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class);
+    Mockito.when(mockEnv.getArgument("input")).thenReturn(INPUT);
+
+    QueryContext mockContext = getMockAllowContext();
+    Mockito.when(mockContext.getAuthentication()).thenReturn(Mockito.mock(Authentication.class));
+    Mockito.when(mockContext.getActorUrn()).thenReturn(TEST_ACTOR_URN.toString());
+    Mockito.when(mockEnv.getContext()).thenReturn(mockContext);
+
+    Mockito.when(mockService.getAspect(
+            Urn.createFromString(DOMAIN_URN),
+            DOMAIN_PROPERTIES_ASPECT_NAME,
+            0))
+        .thenReturn(null);
+
+    MoveDomainResolver resolver = new MoveDomainResolver(mockService, mockClient);
+    assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join());
+    verifyNoIngestProposal(mockService);
+  }
+
+  @Test
+  public void testGetFailureParentDoesNotExist() throws Exception {
+    EntityService mockService = Mockito.mock(EntityService.class);
+    EntityClient mockClient = Mockito.mock(EntityClient.class);
+    Mockito.when(mockService.exists(Urn.createFromString(PARENT_DOMAIN_URN))).thenReturn(false);
+    DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class);
+    Mockito.when(mockEnv.getArgument("input")).thenReturn(INPUT);
+
+    MoveDomainResolver resolver = new MoveDomainResolver(mockService, mockClient);
+    setupTests(mockEnv, mockService, mockClient);
+
+    assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join());
+    verifyNoIngestProposal(mockService);
+  }
+
+  @Test
+  public void testGetFailureParentIsNotDomain() throws Exception {
+    EntityService mockService = Mockito.mock(EntityService.class);
+    EntityClient mockClient = Mockito.mock(EntityClient.class);
+    Mockito.when(mockService.exists(Urn.createFromString(PARENT_DOMAIN_URN))).thenReturn(true);
+    DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class);
+    Mockito.when(mockEnv.getArgument("input")).thenReturn(INVALID_INPUT);
+
+    MoveDomainResolver resolver = new MoveDomainResolver(mockService, mockClient);
+    setupTests(mockEnv, mockService, mockClient);
+
+    assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join());
+    verifyNoIngestProposal(mockService);
+  }
+}
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/ParentDomainsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/ParentDomainsResolverTest.java
new file mode 100644
index 0000000000000..7bd7c3afac001
--- /dev/null
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/ParentDomainsResolverTest.java
@@ -0,0 +1,95 @@
+package com.linkedin.datahub.graphql.resolvers.domain;
+
+import com.datahub.authentication.Authentication;
+import com.linkedin.common.urn.Urn;
+import com.linkedin.datahub.graphql.QueryContext;
+import com.linkedin.datahub.graphql.generated.Domain;
+import com.linkedin.datahub.graphql.generated.EntityType;
+import com.linkedin.datahub.graphql.generated.ParentDomainsResult;
+import com.linkedin.domain.DomainProperties;
+import com.linkedin.entity.Aspect;
+import com.linkedin.entity.EntityResponse;
+import com.linkedin.entity.EnvelopedAspect;
+import com.linkedin.entity.EnvelopedAspectMap;
+import com.linkedin.entity.client.EntityClient;
+import graphql.schema.DataFetchingEnvironment;
+import org.mockito.Mockito;
+import org.testng.annotations.Test;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import static com.linkedin.metadata.Constants.*;
+import static org.testng.Assert.assertEquals;
+
+public class ParentDomainsResolverTest {
+  @Test
+  public void testGetSuccessForDomain() throws Exception {
+    EntityClient mockClient = Mockito.mock(EntityClient.class);
+    QueryContext mockContext = Mockito.mock(QueryContext.class);
+    Mockito.when(mockContext.getAuthentication()).thenReturn(Mockito.mock(Authentication.class));
+    DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class);
+    Mockito.when(mockEnv.getContext()).thenReturn(mockContext);
+
+    Urn domainUrn = Urn.createFromString("urn:li:domain:00005397daf94708a8822b8106cfd451");
+    Domain domainEntity = new Domain();
+    domainEntity.setUrn(domainUrn.toString());
+    domainEntity.setType(EntityType.DOMAIN);
+    Mockito.when(mockEnv.getSource()).thenReturn(domainEntity);
+
+    final DomainProperties parentDomain1 = new DomainProperties().setParentDomain(Urn.createFromString(
+        "urn:li:domain:11115397daf94708a8822b8106cfd451")
+    ).setName("test def");
+    final DomainProperties parentDomain2 = new DomainProperties().setParentDomain(Urn.createFromString(
+        "urn:li:domain:22225397daf94708a8822b8106cfd451")
+    ).setName("test def 2");
+
+    Map<String, EnvelopedAspect> domainAspects = new HashMap<>();
+    domainAspects.put(DOMAIN_PROPERTIES_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(parentDomain1.data())));
+
+    Map<String, EnvelopedAspect> parentDomain1Aspects = new HashMap<>();
+    parentDomain1Aspects.put(DOMAIN_PROPERTIES_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(
+        new DomainProperties().setName("domain parent 1").setParentDomain(parentDomain2.getParentDomain()).data()
+    )));
+
+    Map<String, EnvelopedAspect> parentDomain2Aspects = new HashMap<>();
+    parentDomain2Aspects.put(DOMAIN_PROPERTIES_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(
+        new DomainProperties().setName("domain parent 2").data()
+    )));
+
+    Mockito.when(mockClient.getV2(
+        Mockito.eq(domainUrn.getEntityType()),
+        Mockito.eq(domainUrn),
+        Mockito.eq(Collections.singleton(DOMAIN_PROPERTIES_ASPECT_NAME)),
+        Mockito.any(Authentication.class)
+    )).thenReturn(new EntityResponse().setAspects(new EnvelopedAspectMap(domainAspects)));
+
+    Mockito.when(mockClient.getV2(
+        Mockito.eq(parentDomain1.getParentDomain().getEntityType()),
+        Mockito.eq(parentDomain1.getParentDomain()),
+        Mockito.eq(Collections.singleton(DOMAIN_PROPERTIES_ASPECT_NAME)),
+        Mockito.any(Authentication.class)
+    )).thenReturn(new EntityResponse().setAspects(new EnvelopedAspectMap(parentDomain1Aspects)));
+
+    Mockito.when(mockClient.getV2(
+        Mockito.eq(parentDomain2.getParentDomain().getEntityType()),
+        Mockito.eq(parentDomain2.getParentDomain()),
+        Mockito.eq(Collections.singleton(DOMAIN_PROPERTIES_ASPECT_NAME)),
+        Mockito.any(Authentication.class)
+    )).thenReturn(new EntityResponse().setAspects(new EnvelopedAspectMap(parentDomain2Aspects)));
+
+    ParentDomainsResolver resolver = new ParentDomainsResolver(mockClient);
+    ParentDomainsResult result = resolver.get(mockEnv).get();
+
+    Mockito.verify(mockClient, Mockito.times(3)).getV2(
+        Mockito.any(),
+        Mockito.any(),
+        Mockito.any(),
+        Mockito.any()
+    );
+    assertEquals(result.getCount(), 2);
+    assertEquals(result.getDomains().get(0).getUrn(), parentDomain1.getParentDomain().toString());
+    assertEquals(result.getDomains().get(1).getUrn(), parentDomain2.getParentDomain().toString());
+  }
+}
\ No newline at end of file
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/UpdateNameResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/UpdateNameResolverTest.java
index 064e2dd3bd59b..eee9cfbae8fcb 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/UpdateNameResolverTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/UpdateNameResolverTest.java
@@ -8,12 +8,15 @@
 import com.linkedin.datahub.graphql.generated.UpdateNameInput;
 import com.linkedin.datahub.graphql.resolvers.mutate.MutationUtils;
 import com.linkedin.datahub.graphql.resolvers.mutate.UpdateNameResolver;
+import com.linkedin.datahub.graphql.resolvers.mutate.util.DomainUtils;
 import com.linkedin.domain.DomainProperties;
 import com.linkedin.entity.client.EntityClient;
 import com.linkedin.glossary.GlossaryNodeInfo;
 import com.linkedin.glossary.GlossaryTermInfo;
 import com.linkedin.metadata.Constants;
 import com.linkedin.metadata.entity.EntityService;
+import com.linkedin.metadata.search.SearchEntityArray;
+import com.linkedin.metadata.search.SearchResult;
 import com.linkedin.mxe.MetadataChangeProposal;
 import graphql.schema.DataFetchingEnvironment;
 import org.mockito.Mockito;
@@ -121,6 +124,15 @@ public void testGetSuccessForDomain() throws Exception {
             0))
         .thenReturn(new DomainProperties().setName(name));
 
+    Mockito.when(mockClient.filter(
+        Mockito.eq(Constants.DOMAIN_ENTITY_NAME),
+        Mockito.eq(DomainUtils.buildNameAndParentDomainFilter(INPUT_FOR_DOMAIN.getName(), null)),
+        Mockito.eq(null),
+        Mockito.any(Integer.class),
+        Mockito.any(Integer.class),
+        Mockito.any(Authentication.class)
+    )).thenReturn(new SearchResult().setEntities(new SearchEntityArray()));
+
     DomainProperties properties = new DomainProperties();
     properties.setName(NEW_NAME);
     final MetadataChangeProposal proposal = MutationUtils.buildMetadataChangeProposalWithUrn(Urn.createFromString(DOMAIN_URN),
diff --git a/datahub-web-react/src/app/SearchRoutes.tsx b/datahub-web-react/src/app/SearchRoutes.tsx
index 82606befd2663..d2ad4ab6f4db1 100644
--- a/datahub-web-react/src/app/SearchRoutes.tsx
+++ b/datahub-web-react/src/app/SearchRoutes.tsx
@@ -8,20 +8,27 @@ import { EntityPage } from './entity/EntityPage';
 import { BrowseResultsPage } from './browse/BrowseResultsPage';
 import { SearchPage } from './search/SearchPage';
 import { AnalyticsPage } from './analyticsDashboard/components/AnalyticsPage';
-import { ManageDomainsPage } from './domain/ManageDomainsPage';
 import { ManageIngestionPage } from './ingest/ManageIngestionPage';
 import GlossaryRoutes from './glossary/GlossaryRoutes';
 import { SettingsPage } from './settings/SettingsPage';
+import DomainRoutes from './domain/DomainRoutes';
+import { useIsNestedDomainsEnabled } from './useAppConfig';
+import { ManageDomainsPage } from './domain/ManageDomainsPage';
 
 /**
  * Container for all searchable page routes
  */
 export const SearchRoutes = (): JSX.Element => {
     const entityRegistry = useEntityRegistry();
+    const isNestedDomainsEnabled = useIsNestedDomainsEnabled();
+    const entities = isNestedDomainsEnabled
+        ? entityRegistry.getEntitiesForSearchRoutes()
+        : entityRegistry.getNonGlossaryEntities();
+
     return (
         <SearchablePage>
             <Switch>
-                {entityRegistry.getNonGlossaryEntities().map((entity) => (
+                {entities.map((entity) => (
                     <Route
                         key={entity.getPathName()}
                         path={`/${entity.getPathName()}/:urn`}
@@ -38,7 +45,8 @@ export const SearchRoutes = (): JSX.Element => {
                 />
                 <Route path={PageRoutes.PERMISSIONS} render={() => <Redirect to="/settings/permissions" />} />
                 <Route path={PageRoutes.IDENTITIES} render={() => <Redirect to="/settings/identities" />} />
-                <Route path={PageRoutes.DOMAINS} render={() => <ManageDomainsPage />} />
+                {isNestedDomainsEnabled && <Route path={`${PageRoutes.DOMAIN}*`} render={() => <DomainRoutes />} />}
+                {!isNestedDomainsEnabled && <Route path={PageRoutes.DOMAINS} render={() => <ManageDomainsPage />} />}
                 <Route path={PageRoutes.INGESTION} render={() => <ManageIngestionPage />} />
                 <Route path={PageRoutes.SETTINGS} render={() => <SettingsPage />} />
                 <Route path={`${PageRoutes.GLOSSARY}*`} render={() => <GlossaryRoutes />} />
diff --git a/datahub-web-react/src/app/analytics/event.ts b/datahub-web-react/src/app/analytics/event.ts
index 84173b522fb07..28cd61ff3171a 100644
--- a/datahub-web-react/src/app/analytics/event.ts
+++ b/datahub-web-react/src/app/analytics/event.ts
@@ -55,6 +55,7 @@ export enum EventType {
     ShowStandardHomepageEvent,
     CreateGlossaryEntityEvent,
     CreateDomainEvent,
+    MoveDomainEvent,
     CreateIngestionSourceEvent,
     UpdateIngestionSourceEvent,
     DeleteIngestionSourceEvent,
@@ -454,6 +455,13 @@ export interface CreateGlossaryEntityEvent extends BaseEvent {
 
 export interface CreateDomainEvent extends BaseEvent {
     type: EventType.CreateDomainEvent;
+    parentDomainUrn?: string;
+}
+
+export interface MoveDomainEvent extends BaseEvent {
+    type: EventType.MoveDomainEvent;
+    oldParentDomainUrn?: string;
+    parentDomainUrn?: string;
 }
 
 // Managed Ingestion Events
@@ -653,6 +661,7 @@ export type Event =
     | ShowStandardHomepageEvent
     | CreateGlossaryEntityEvent
     | CreateDomainEvent
+    | MoveDomainEvent
     | CreateIngestionSourceEvent
     | UpdateIngestionSourceEvent
     | DeleteIngestionSourceEvent
diff --git a/datahub-web-react/src/app/domain/CreateDomainModal.tsx b/datahub-web-react/src/app/domain/CreateDomainModal.tsx
index 9fd24b551c0af..ca1bc30596003 100644
--- a/datahub-web-react/src/app/domain/CreateDomainModal.tsx
+++ b/datahub-web-react/src/app/domain/CreateDomainModal.tsx
@@ -5,9 +5,12 @@ import { useCreateDomainMutation } from '../../graphql/domain.generated';
 import { useEnterKeyListener } from '../shared/useEnterKeyListener';
 import { validateCustomUrnId } from '../shared/textUtil';
 import analytics, { EventType } from '../analytics';
+import DomainParentSelect from '../entity/shared/EntityDropdown/DomainParentSelect';
+import { useIsNestedDomainsEnabled } from '../useAppConfig';
+import { useDomainsContext } from './DomainsContext';
 
 const SuggestedNamesGroup = styled.div`
-    margin-top: 12px;
+    margin-top: 8px;
 `;
 
 const ClickableTag = styled(Tag)`
@@ -16,9 +19,38 @@ const ClickableTag = styled(Tag)`
     }
 `;
 
+const FormItem = styled(Form.Item)`
+    .ant-form-item-label {
+        padding-bottom: 2px;
+    }
+`;
+
+const FormItemWithMargin = styled(FormItem)`
+    margin-bottom: 16px;
+`;
+
+const FormItemNoMargin = styled(FormItem)`
+    margin-bottom: 0;
+`;
+
+const FormItemLabel = styled(Typography.Text)`
+    font-weight: 600;
+    color: #373d44;
+`;
+
+const AdvancedLabel = styled(Typography.Text)`
+    color: #373d44;
+`;
+
 type Props = {
     onClose: () => void;
-    onCreate: (urn: string, id: string | undefined, name: string, description: string | undefined) => void;
+    onCreate: (
+        urn: string,
+        id: string | undefined,
+        name: string,
+        description: string | undefined,
+        parentDomain?: string,
+    ) => void;
 };
 
 const SUGGESTED_DOMAIN_NAMES = ['Engineering', 'Marketing', 'Sales', 'Product'];
@@ -28,7 +60,12 @@ const NAME_FIELD_NAME = 'name';
 const DESCRIPTION_FIELD_NAME = 'description';
 
 export default function CreateDomainModal({ onClose, onCreate }: Props) {
+    const isNestedDomainsEnabled = useIsNestedDomainsEnabled();
     const [createDomainMutation] = useCreateDomainMutation();
+    const { entityData } = useDomainsContext();
+    const [selectedParentUrn, setSelectedParentUrn] = useState<string>(
+        (isNestedDomainsEnabled && entityData?.urn) || '',
+    );
     const [createButtonEnabled, setCreateButtonEnabled] = useState(false);
     const [form] = Form.useForm();
 
@@ -39,6 +76,7 @@ export default function CreateDomainModal({ onClose, onCreate }: Props) {
                     id: form.getFieldValue(ID_FIELD_NAME),
                     name: form.getFieldValue(NAME_FIELD_NAME),
                     description: form.getFieldValue(DESCRIPTION_FIELD_NAME),
+                    parentDomain: selectedParentUrn || undefined,
                 },
             },
         })
@@ -46,6 +84,7 @@ export default function CreateDomainModal({ onClose, onCreate }: Props) {
                 if (!errors) {
                     analytics.event({
                         type: EventType.CreateDomainEvent,
+                        parentDomainUrn: selectedParentUrn || undefined,
                     });
                     message.success({
                         content: `Created domain!`,
@@ -56,6 +95,7 @@ export default function CreateDomainModal({ onClose, onCreate }: Props) {
                         form.getFieldValue(ID_FIELD_NAME),
                         form.getFieldValue(NAME_FIELD_NAME),
                         form.getFieldValue(DESCRIPTION_FIELD_NAME),
+                        selectedParentUrn || undefined,
                     );
                     form.resetFields();
                 }
@@ -74,7 +114,7 @@ export default function CreateDomainModal({ onClose, onCreate }: Props) {
 
     return (
         <Modal
-            title="Create new Domain"
+            title="Create New Domain"
             visible
             onCancel={onClose}
             footer={
@@ -101,9 +141,16 @@ export default function CreateDomainModal({ onClose, onCreate }: Props) {
                     setCreateButtonEnabled(!form.getFieldsError().some((field) => field.errors.length > 0));
                 }}
             >
-                <Form.Item label={<Typography.Text strong>Name</Typography.Text>}>
-                    <Typography.Paragraph>Give your new Domain a name. </Typography.Paragraph>
-                    <Form.Item
+                {isNestedDomainsEnabled && (
+                    <FormItemWithMargin label={<FormItemLabel>Parent (optional)</FormItemLabel>}>
+                        <DomainParentSelect
+                            selectedParentUrn={selectedParentUrn}
+                            setSelectedParentUrn={setSelectedParentUrn}
+                        />
+                    </FormItemWithMargin>
+                )}
+                <FormItemWithMargin label={<FormItemLabel>Name</FormItemLabel>}>
+                    <FormItemNoMargin
                         name={NAME_FIELD_NAME}
                         rules={[
                             {
@@ -116,7 +163,7 @@ export default function CreateDomainModal({ onClose, onCreate }: Props) {
                         hasFeedback
                     >
                         <Input data-testid="create-domain-name" placeholder="A name for your domain" />
-                    </Form.Item>
+                    </FormItemNoMargin>
                     <SuggestedNamesGroup>
                         {SUGGESTED_DOMAIN_NAMES.map((name) => {
                             return (
@@ -134,29 +181,29 @@ export default function CreateDomainModal({ onClose, onCreate }: Props) {
                             );
                         })}
                     </SuggestedNamesGroup>
-                </Form.Item>
-                <Form.Item label={<Typography.Text strong>Description</Typography.Text>}>
-                    <Typography.Paragraph>
-                        An optional description for your new domain. You can change this later.
-                    </Typography.Paragraph>
-                    <Form.Item
+                </FormItemWithMargin>
+                <FormItemWithMargin
+                    label={<FormItemLabel>Description</FormItemLabel>}
+                    help="You can always change the description later."
+                >
+                    <FormItemNoMargin
                         name={DESCRIPTION_FIELD_NAME}
                         rules={[{ whitespace: true }, { min: 1, max: 500 }]}
                         hasFeedback
                     >
                         <Input.TextArea placeholder="A description for your domain" />
-                    </Form.Item>
-                </Form.Item>
+                    </FormItemNoMargin>
+                </FormItemWithMargin>
                 <Collapse ghost>
-                    <Collapse.Panel header={<Typography.Text type="secondary">Advanced</Typography.Text>} key="1">
-                        <Form.Item label={<Typography.Text strong>Domain Id</Typography.Text>}>
-                            <Typography.Paragraph>
-                                By default, a random UUID will be generated to uniquely identify this domain. If
-                                you&apos;d like to provide a custom id instead to more easily keep track of this domain,
+                    <Collapse.Panel header={<AdvancedLabel>Advanced Options</AdvancedLabel>} key="1">
+                        <FormItemWithMargin
+                            label={<Typography.Text strong>Domain Id</Typography.Text>}
+                            help="By default, a random UUID will be generated to uniquely identify this domain. If
+                                you'd like to provide a custom id instead to more easily keep track of this domain,
                                 you may provide it here. Be careful, you cannot easily change the domain id after
-                                creation.
-                            </Typography.Paragraph>
-                            <Form.Item
+                                creation."
+                        >
+                            <FormItemNoMargin
                                 name={ID_FIELD_NAME}
                                 rules={[
                                     () => ({
@@ -170,8 +217,8 @@ export default function CreateDomainModal({ onClose, onCreate }: Props) {
                                 ]}
                             >
                                 <Input data-testid="create-domain-id" placeholder="engineering" />
-                            </Form.Item>
-                        </Form.Item>
+                            </FormItemNoMargin>
+                        </FormItemWithMargin>
                     </Collapse.Panel>
                 </Collapse>
             </Form>
diff --git a/datahub-web-react/src/app/domain/DomainIcon.tsx b/datahub-web-react/src/app/domain/DomainIcon.tsx
new file mode 100644
index 0000000000000..0fe9892f0c281
--- /dev/null
+++ b/datahub-web-react/src/app/domain/DomainIcon.tsx
@@ -0,0 +1,11 @@
+import Icon from '@ant-design/icons/lib/components/Icon';
+import React from 'react';
+import { ReactComponent as DomainsIcon } from '../../images/domain.svg';
+
+type Props = {
+    style?: React.CSSProperties;
+};
+
+export default function DomainIcon({ style }: Props) {
+    return <Icon component={DomainsIcon} style={style} />;
+}
diff --git a/datahub-web-react/src/app/domain/DomainRoutes.tsx b/datahub-web-react/src/app/domain/DomainRoutes.tsx
new file mode 100644
index 0000000000000..56811ddc48c0c
--- /dev/null
+++ b/datahub-web-react/src/app/domain/DomainRoutes.tsx
@@ -0,0 +1,39 @@
+import React, { useState } from 'react';
+import styled from 'styled-components/macro';
+import { Switch, Route } from 'react-router-dom';
+import { PageRoutes } from '../../conf/Global';
+import { EntityPage } from '../entity/EntityPage';
+import { useEntityRegistry } from '../useEntityRegistry';
+import ManageDomainsPageV2 from './nestedDomains/ManageDomainsPageV2';
+import { EntityType } from '../../types.generated';
+import ManageDomainsSidebar from './nestedDomains/ManageDomainsSidebar';
+import { DomainsContext } from './DomainsContext';
+import { GenericEntityProperties } from '../entity/shared/types';
+
+const ContentWrapper = styled.div`
+    display: flex;
+    flex: 1;
+    overflow: hidden;
+`;
+
+export default function DomainRoutes() {
+    const entityRegistry = useEntityRegistry();
+    const [entityData, setEntityData] = useState<GenericEntityProperties | null>(null);
+    const [parentDomainsToUpdate, setParentDomainsToUpdate] = useState<string[]>([]);
+
+    return (
+        <DomainsContext.Provider value={{ entityData, setEntityData, parentDomainsToUpdate, setParentDomainsToUpdate }}>
+            <ContentWrapper>
+                <ManageDomainsSidebar />
+                <Switch>
+                    <Route
+                        key={entityRegistry.getPathName(EntityType.Domain)}
+                        path={`/${entityRegistry.getPathName(EntityType.Domain)}/:urn`}
+                        render={() => <EntityPage entityType={EntityType.Domain} />}
+                    />
+                    <Route path={PageRoutes.DOMAINS} render={() => <ManageDomainsPageV2 />} />
+                </Switch>
+            </ContentWrapper>
+        </DomainsContext.Provider>
+    );
+}
diff --git a/datahub-web-react/src/app/domain/DomainSearch.tsx b/datahub-web-react/src/app/domain/DomainSearch.tsx
new file mode 100644
index 0000000000000..e82dae9c2c9e6
--- /dev/null
+++ b/datahub-web-react/src/app/domain/DomainSearch.tsx
@@ -0,0 +1,143 @@
+import React, { CSSProperties, useRef, useState } from 'react';
+import { Link } from 'react-router-dom';
+import styled from 'styled-components/macro';
+import Highlight from 'react-highlighter';
+import { useGetSearchResultsForMultipleQuery } from '../../graphql/search.generated';
+import { EntityType } from '../../types.generated';
+import { IconStyleType } from '../entity/Entity';
+import { ANTD_GRAY } from '../entity/shared/constants';
+import { SearchBar } from '../search/SearchBar';
+import ClickOutside from '../shared/ClickOutside';
+import { useEntityRegistry } from '../useEntityRegistry';
+import DomainIcon from './DomainIcon';
+import ParentEntities from '../search/filters/ParentEntities';
+import { getParentDomains } from './utils';
+
+const DomainSearchWrapper = styled.div`
+    position: relative;
+`;
+
+const ResultsWrapper = styled.div`
+    background-color: white;
+    border-radius: 5px;
+    box-shadow: 0 3px 6px -4px rgb(0 0 0 / 12%), 0 6px 16px 0 rgb(0 0 0 / 8%), 0 9px 28px 8px rgb(0 0 0 / 5%);
+    max-height: 380px;
+    overflow: auto;
+    padding: 8px;
+    position: absolute;
+    max-height: 210px;
+    overflow: auto;
+    width: calc(100% - 24px);
+    left: 12px;
+    top: 45px;
+    z-index: 1;
+`;
+
+const SearchResult = styled(Link)`
+    color: #262626;
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    height: 100%;
+    padding: 6px 8px;
+    width: 100%;
+    &:hover {
+        background-color: ${ANTD_GRAY[3]};
+        color: #262626;
+    }
+`;
+
+const IconWrapper = styled.span``;
+
+const highlightMatchStyle: CSSProperties = {
+    fontWeight: 'bold',
+    background: 'none',
+    padding: 0,
+};
+
+function DomainSearch() {
+    const [query, setQuery] = useState('');
+    const [isSearchBarFocused, setIsSearchBarFocused] = useState(false);
+    const entityRegistry = useEntityRegistry();
+
+    const { data } = useGetSearchResultsForMultipleQuery({
+        variables: {
+            input: {
+                types: [EntityType.Domain],
+                query,
+                start: 0,
+                count: 50,
+            },
+        },
+        skip: !query,
+    });
+
+    const searchResults = data?.searchAcrossEntities?.searchResults;
+    const timerRef = useRef(-1);
+    const handleQueryChange = (q: string) => {
+        window.clearTimeout(timerRef.current);
+        timerRef.current = window.setTimeout(() => {
+            setQuery(q);
+        }, 250);
+    };
+
+    return (
+        <DomainSearchWrapper>
+            <ClickOutside onClickOutside={() => setIsSearchBarFocused(false)}>
+                <SearchBar
+                    initialQuery={query || ''}
+                    placeholderText="Search Domains"
+                    suggestions={[]}
+                    hideRecommendations
+                    style={{
+                        padding: 12,
+                        paddingBottom: 5,
+                    }}
+                    inputStyle={{
+                        height: 30,
+                        fontSize: 12,
+                    }}
+                    onSearch={() => null}
+                    onQueryChange={(q) => handleQueryChange(q)}
+                    entityRegistry={entityRegistry}
+                    onFocus={() => setIsSearchBarFocused(true)}
+                />
+                {isSearchBarFocused && searchResults && !!searchResults.length && (
+                    <ResultsWrapper>
+                        {searchResults.map((result) => {
+                            return (
+                                <SearchResult
+                                    to={entityRegistry.getEntityUrl(result.entity.type, result.entity.urn)}
+                                    onClick={() => setIsSearchBarFocused(false)}
+                                >
+                                    <IconWrapper>
+                                        {result.entity.type === EntityType.Domain ? (
+                                            <DomainIcon
+                                                style={{
+                                                    fontSize: 16,
+                                                    color: '#BFBFBF',
+                                                }}
+                                            />
+                                        ) : (
+                                            entityRegistry.getIcon(result.entity.type, 12, IconStyleType.ACCENT)
+                                        )}
+                                    </IconWrapper>
+                                    <div>
+                                        <ParentEntities
+                                            parentEntities={getParentDomains(result.entity, entityRegistry)}
+                                        />
+                                        <Highlight matchStyle={highlightMatchStyle} search={query}>
+                                            {entityRegistry.getDisplayName(result.entity.type, result.entity)}
+                                        </Highlight>
+                                    </div>
+                                </SearchResult>
+                            );
+                        })}
+                    </ResultsWrapper>
+                )}
+            </ClickOutside>
+        </DomainSearchWrapper>
+    );
+}
+
+export default DomainSearch;
diff --git a/datahub-web-react/src/app/domain/DomainsContext.tsx b/datahub-web-react/src/app/domain/DomainsContext.tsx
new file mode 100644
index 0000000000000..ecbdaebd03817
--- /dev/null
+++ b/datahub-web-react/src/app/domain/DomainsContext.tsx
@@ -0,0 +1,21 @@
+import React, { useContext } from 'react';
+import { GenericEntityProperties } from '../entity/shared/types';
+
+export interface DomainsContextType {
+    entityData: GenericEntityProperties | null;
+    setEntityData: (entityData: GenericEntityProperties | null) => void;
+    parentDomainsToUpdate: string[];
+    setParentDomainsToUpdate: (values: string[]) => void;
+}
+
+export const DomainsContext = React.createContext<DomainsContextType>({
+    entityData: null,
+    setEntityData: () => {},
+    parentDomainsToUpdate: [], // used to tell domains to refetch their children count after updates (create, move, delete)
+    setParentDomainsToUpdate: () => {},
+});
+
+export const useDomainsContext = () => {
+    const { entityData, setEntityData, parentDomainsToUpdate, setParentDomainsToUpdate } = useContext(DomainsContext);
+    return { entityData, setEntityData, parentDomainsToUpdate, setParentDomainsToUpdate };
+};
diff --git a/datahub-web-react/src/app/domain/DomainsList.tsx b/datahub-web-react/src/app/domain/DomainsList.tsx
index f5fea36e32bda..b1095726808fe 100644
--- a/datahub-web-react/src/app/domain/DomainsList.tsx
+++ b/datahub-web-react/src/app/domain/DomainsList.tsx
@@ -18,8 +18,8 @@ import { OnboardingTour } from '../onboarding/OnboardingTour';
 import { DOMAINS_INTRO_ID, DOMAINS_CREATE_DOMAIN_ID } from '../onboarding/config/DomainsOnboardingConfig';
 import { getElasticCappedTotalValueText } from '../entity/shared/constants';
 import { StyledTable } from '../entity/shared/components/styled/StyledTable';
-import { IconStyleType } from '../entity/Entity';
 import { DomainOwnersColumn, DomainListMenuColumn, DomainNameColumn } from './DomainListColumns';
+import DomainIcon from './DomainIcon';
 
 const DomainsContainer = styled.div``;
 
@@ -82,7 +82,6 @@ export const DomainsList = () => {
         }, 2000);
     };
 
-    const logoIcon = entityRegistry.getIcon(EntityType.Domain, 12, IconStyleType.ACCENT);
     const allColumns = [
         {
             title: 'Name',
@@ -91,7 +90,14 @@ export const DomainsList = () => {
             sorter: (sourceA, sourceB) => {
                 return sourceA.name.localeCompare(sourceB.name);
             },
-            render: DomainNameColumn(logoIcon),
+            render: DomainNameColumn(
+                <DomainIcon
+                    style={{
+                        fontSize: 12,
+                        color: '#BFBFBF',
+                    }}
+                />,
+            ),
         },
         {
             title: 'Owners',
diff --git a/datahub-web-react/src/app/domain/ManageDomainsPage.tsx b/datahub-web-react/src/app/domain/ManageDomainsPage.tsx
index 6172ac0246f58..3e19da1875037 100644
--- a/datahub-web-react/src/app/domain/ManageDomainsPage.tsx
+++ b/datahub-web-react/src/app/domain/ManageDomainsPage.tsx
@@ -1,7 +1,9 @@
 import { Typography } from 'antd';
-import React from 'react';
+import React, { useState } from 'react';
 import styled from 'styled-components';
 import { DomainsList } from './DomainsList';
+import { DomainsContext } from './DomainsContext';
+import { GenericEntityProperties } from '../entity/shared/types';
 
 const PageContainer = styled.div`
     padding-top: 20px;
@@ -22,17 +24,22 @@ const PageTitle = styled(Typography.Title)`
 const ListContainer = styled.div``;
 
 export const ManageDomainsPage = () => {
+    const [entityData, setEntityData] = useState<GenericEntityProperties | null>(null);
+    const [parentDomainsToUpdate, setParentDomainsToUpdate] = useState<string[]>([]);
+
     return (
-        <PageContainer>
-            <PageHeaderContainer>
-                <PageTitle level={3}>Domains</PageTitle>
-                <Typography.Paragraph type="secondary">
-                    View your DataHub Domains. Take administrative actions.
-                </Typography.Paragraph>
-            </PageHeaderContainer>
-            <ListContainer>
-                <DomainsList />
-            </ListContainer>
-        </PageContainer>
+        <DomainsContext.Provider value={{ entityData, setEntityData, parentDomainsToUpdate, setParentDomainsToUpdate }}>
+            <PageContainer>
+                <PageHeaderContainer>
+                    <PageTitle level={3}>Domains</PageTitle>
+                    <Typography.Paragraph type="secondary">
+                        View your DataHub Domains. Take administrative actions.
+                    </Typography.Paragraph>
+                </PageHeaderContainer>
+                <ListContainer>
+                    <DomainsList />
+                </ListContainer>
+            </PageContainer>
+        </DomainsContext.Provider>
     );
 };
diff --git a/datahub-web-react/src/app/domain/nestedDomains/DomainsSidebarHeader.tsx b/datahub-web-react/src/app/domain/nestedDomains/DomainsSidebarHeader.tsx
new file mode 100644
index 0000000000000..d9ff18514d8cf
--- /dev/null
+++ b/datahub-web-react/src/app/domain/nestedDomains/DomainsSidebarHeader.tsx
@@ -0,0 +1,58 @@
+import { useApolloClient } from '@apollo/client';
+import { PlusOutlined } from '@ant-design/icons';
+import { Button } from 'antd';
+import React, { useState } from 'react';
+import { Link } from 'react-router-dom';
+import styled from 'styled-components';
+import { ANTD_GRAY, ANTD_GRAY_V2 } from '../../entity/shared/constants';
+import DomainsTitle from './DomainsTitle';
+import { PageRoutes } from '../../../conf/Global';
+import CreateDomainModal from '../CreateDomainModal';
+import { updateListDomainsCache } from '../utils';
+import { useDomainsContext } from '../DomainsContext';
+
+const HeaderWrapper = styled.div`
+    border-bottom: 1px solid ${ANTD_GRAY[4]};
+    padding: 16px;
+    font-size: 20px;
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+`;
+
+const StyledButton = styled(Button)`
+    box-shadow: none;
+    border-color: ${ANTD_GRAY_V2[6]};
+`;
+
+const StyledLink = styled(Link)`
+    color: inherit;
+
+    &:hover {
+        color: inherit;
+    }
+`;
+
+export default function DomainsSidebarHeader() {
+    const { setParentDomainsToUpdate } = useDomainsContext();
+    const [isCreatingDomain, setIsCreatingDomain] = useState(false);
+    const client = useApolloClient();
+
+    return (
+        <HeaderWrapper>
+            <StyledLink to={`${PageRoutes.DOMAINS}`}>
+                <DomainsTitle />
+            </StyledLink>
+            <StyledButton icon={<PlusOutlined />} onClick={() => setIsCreatingDomain(true)} />
+            {isCreatingDomain && (
+                <CreateDomainModal
+                    onClose={() => setIsCreatingDomain(false)}
+                    onCreate={(urn, id, name, description, parentDomain) => {
+                        updateListDomainsCache(client, urn, id, name, description, parentDomain);
+                        if (parentDomain) setParentDomainsToUpdate([parentDomain]);
+                    }}
+                />
+            )}
+        </HeaderWrapper>
+    );
+}
diff --git a/datahub-web-react/src/app/domain/nestedDomains/DomainsTitle.tsx b/datahub-web-react/src/app/domain/nestedDomains/DomainsTitle.tsx
new file mode 100644
index 0000000000000..3aa7c8330d079
--- /dev/null
+++ b/datahub-web-react/src/app/domain/nestedDomains/DomainsTitle.tsx
@@ -0,0 +1,18 @@
+import React from 'react';
+import styled from 'styled-components';
+import DomainIcon from '../DomainIcon';
+
+const IconWrapper = styled.span`
+    margin-right: 10px;
+`;
+
+export default function DomainsTitle() {
+    return (
+        <span>
+            <IconWrapper>
+                <DomainIcon />
+            </IconWrapper>
+            Domains
+        </span>
+    );
+}
diff --git a/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsPageV2.tsx b/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsPageV2.tsx
new file mode 100644
index 0000000000000..486169c3559d3
--- /dev/null
+++ b/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsPageV2.tsx
@@ -0,0 +1,60 @@
+import { useApolloClient } from '@apollo/client';
+import { Button } from 'antd';
+import { PlusOutlined } from '@ant-design/icons';
+import React, { useEffect, useState } from 'react';
+import styled from 'styled-components/macro';
+import DomainsTitle from './DomainsTitle';
+import RootDomains from './RootDomains';
+import { DOMAINS_CREATE_DOMAIN_ID, DOMAINS_INTRO_ID } from '../../onboarding/config/DomainsOnboardingConfig';
+import { OnboardingTour } from '../../onboarding/OnboardingTour';
+import { ANTD_GRAY_V2 } from '../../entity/shared/constants';
+import CreateDomainModal from '../CreateDomainModal';
+import { updateListDomainsCache } from '../utils';
+import { useDomainsContext } from '../DomainsContext';
+
+const PageWrapper = styled.div`
+    background-color: ${ANTD_GRAY_V2[1]};
+    flex: 1;
+    display: flex;
+    flex-direction: column;
+`;
+
+const Header = styled.div`
+    display: flex;
+    justify-content: space-between;
+    padding: 32px 24px;
+    font-size: 30px;
+    align-items: center;
+`;
+
+export default function ManageDomainsPageV2() {
+    const { setEntityData, setParentDomainsToUpdate } = useDomainsContext();
+    const [isCreatingDomain, setIsCreatingDomain] = useState(false);
+    const client = useApolloClient();
+
+    useEffect(() => {
+        setEntityData(null);
+    }, [setEntityData]);
+
+    return (
+        <PageWrapper>
+            <OnboardingTour stepIds={[DOMAINS_INTRO_ID, DOMAINS_CREATE_DOMAIN_ID]} />
+            <Header>
+                <DomainsTitle />
+                <Button type="primary" id={DOMAINS_CREATE_DOMAIN_ID} onClick={() => setIsCreatingDomain(true)}>
+                    <PlusOutlined /> New Domain
+                </Button>
+            </Header>
+            <RootDomains />
+            {isCreatingDomain && (
+                <CreateDomainModal
+                    onClose={() => setIsCreatingDomain(false)}
+                    onCreate={(urn, id, name, description, parentDomain) => {
+                        updateListDomainsCache(client, urn, id, name, description, parentDomain);
+                        if (parentDomain) setParentDomainsToUpdate([parentDomain]);
+                    }}
+                />
+            )}
+        </PageWrapper>
+    );
+}
diff --git a/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsSidebar.tsx b/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsSidebar.tsx
new file mode 100644
index 0000000000000..827031138dcdb
--- /dev/null
+++ b/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsSidebar.tsx
@@ -0,0 +1,28 @@
+import React, { useState } from 'react';
+import { MAX_BROWSER_WIDTH, MIN_BROWSWER_WIDTH } from '../../glossary/BusinessGlossaryPage';
+import { ProfileSidebarResizer } from '../../entity/shared/containers/profile/sidebar/ProfileSidebarResizer';
+import DomainsSidebarHeader from './DomainsSidebarHeader';
+import { SidebarWrapper } from '../../shared/sidebar/components';
+import DomainNavigator from './domainNavigator/DomainNavigator';
+import DomainSearch from '../DomainSearch';
+
+export default function ManageDomainsSidebar() {
+    const [browserWidth, setBrowserWith] = useState(window.innerWidth * 0.2);
+
+    return (
+        <>
+            <SidebarWrapper width={browserWidth}>
+                <DomainsSidebarHeader />
+                <DomainSearch />
+                <DomainNavigator />
+            </SidebarWrapper>
+            <ProfileSidebarResizer
+                setSidePanelWidth={(width) =>
+                    setBrowserWith(Math.min(Math.max(width, MIN_BROWSWER_WIDTH), MAX_BROWSER_WIDTH))
+                }
+                initialSize={browserWidth}
+                isSidebarOnLeft
+            />
+        </>
+    );
+}
diff --git a/datahub-web-react/src/app/domain/nestedDomains/RootDomains.tsx b/datahub-web-react/src/app/domain/nestedDomains/RootDomains.tsx
new file mode 100644
index 0000000000000..757119919e336
--- /dev/null
+++ b/datahub-web-react/src/app/domain/nestedDomains/RootDomains.tsx
@@ -0,0 +1,31 @@
+import React from 'react';
+import styled from 'styled-components';
+import { Message } from '../../shared/Message';
+import { ResultWrapper } from '../../search/SearchResultList';
+import { useEntityRegistry } from '../../useEntityRegistry';
+import { EntityType } from '../../../types.generated';
+import useListDomains from '../useListDomains';
+
+const DomainsWrapper = styled.div`
+    overflow: auto;
+    padding: 0 28px 16px 28px;
+`;
+
+export default function RootDomains() {
+    const entityRegistry = useEntityRegistry();
+    const { loading, error, data, sortedDomains } = useListDomains({});
+
+    return (
+        <>
+            {!data && loading && <Message type="loading" content="Loading domains..." />}
+            {error && <Message type="error" content="Failed to load domains. An unexpected error occurred." />}
+            <DomainsWrapper>
+                {sortedDomains?.map((domain) => (
+                    <ResultWrapper showUpdatedStyles>
+                        {entityRegistry.renderSearchResult(EntityType.Domain, { entity: domain, matchedFields: [] })}
+                    </ResultWrapper>
+                ))}
+            </DomainsWrapper>
+        </>
+    );
+}
diff --git a/datahub-web-react/src/app/domain/nestedDomains/domainNavigator/DomainNavigator.tsx b/datahub-web-react/src/app/domain/nestedDomains/domainNavigator/DomainNavigator.tsx
new file mode 100644
index 0000000000000..0fbcffb9a260c
--- /dev/null
+++ b/datahub-web-react/src/app/domain/nestedDomains/domainNavigator/DomainNavigator.tsx
@@ -0,0 +1,37 @@
+import { Alert } from 'antd';
+import React from 'react';
+import styled from 'styled-components';
+import useListDomains from '../../useListDomains';
+import DomainNode from './DomainNode';
+import { Domain } from '../../../../types.generated';
+
+const NavigatorWrapper = styled.div`
+    font-size: 14px;
+    max-height: calc(100% - 65px);
+    padding: 8px 8px 16px 16px;
+    overflow: auto;
+`;
+
+interface Props {
+    domainUrnToHide?: string;
+    selectDomainOverride?: (domain: Domain) => void;
+}
+
+export default function DomainNavigator({ domainUrnToHide, selectDomainOverride }: Props) {
+    const { sortedDomains, error } = useListDomains({});
+
+    return (
+        <NavigatorWrapper>
+            {error && <Alert message="Loading Domains failed." showIcon type="error" />}
+            {sortedDomains?.map((domain) => (
+                <DomainNode
+                    key={domain.urn}
+                    domain={domain as Domain}
+                    numDomainChildren={domain.children?.total || 0}
+                    domainUrnToHide={domainUrnToHide}
+                    selectDomainOverride={selectDomainOverride}
+                />
+            ))}
+        </NavigatorWrapper>
+    );
+}
diff --git a/datahub-web-react/src/app/domain/nestedDomains/domainNavigator/DomainNode.tsx b/datahub-web-react/src/app/domain/nestedDomains/domainNavigator/DomainNode.tsx
new file mode 100644
index 0000000000000..09c8e13853bb7
--- /dev/null
+++ b/datahub-web-react/src/app/domain/nestedDomains/domainNavigator/DomainNode.tsx
@@ -0,0 +1,137 @@
+import { Typography } from 'antd';
+import React, { useEffect, useMemo } from 'react';
+import { useHistory } from 'react-router';
+import styled from 'styled-components';
+import { Domain } from '../../../../types.generated';
+import { useEntityRegistry } from '../../../useEntityRegistry';
+import { RotatingTriangle } from '../../../shared/sidebar/components';
+import DomainIcon from '../../DomainIcon';
+import useListDomains from '../../useListDomains';
+import useToggle from '../../../shared/useToggle';
+import { BodyContainer, BodyGridExpander } from '../../../shared/components';
+import { ANTD_GRAY_V2 } from '../../../entity/shared/constants';
+import { useDomainsContext } from '../../DomainsContext';
+import { applyOpacity } from '../../../shared/styleUtils';
+import useHasDomainChildren from './useHasDomainChildren';
+
+const RowWrapper = styled.div`
+    align-items: center;
+    display: flex;
+    padding: 2px 2px 4px 0;
+    overflow: hidden;
+`;
+
+const NameWrapper = styled(Typography.Text)<{ isSelected: boolean; addLeftPadding: boolean }>`
+    flex: 1;
+    overflow: hidden;
+    padding: 2px;
+    ${(props) =>
+        props.isSelected && `background-color: ${applyOpacity(props.theme.styles['primary-color'] || '', 10)};`}
+    ${(props) => props.addLeftPadding && 'padding-left: 22px;'}
+
+    &:hover {
+        ${(props) => !props.isSelected && `background-color: ${ANTD_GRAY_V2[1]};`}
+        cursor: pointer;
+    }
+
+    svg {
+        margin-right: 6px;
+    }
+`;
+
+const ButtonWrapper = styled.span`
+    margin-right: 4px;
+    font-size: 16px;
+    height: 16px;
+    width: 16px;
+
+    svg {
+        height: 10px;
+        width: 10px;
+    }
+
+    .ant-btn {
+        height: 16px;
+        width: 16px;
+    }
+`;
+
+const StyledExpander = styled(BodyGridExpander)`
+    padding-left: 24px;
+`;
+
+interface Props {
+    domain: Domain;
+    numDomainChildren: number;
+    domainUrnToHide?: string;
+    selectDomainOverride?: (domain: Domain) => void;
+}
+
+export default function DomainNode({ domain, numDomainChildren, domainUrnToHide, selectDomainOverride }: Props) {
+    const shouldHideDomain = domainUrnToHide === domain.urn;
+    const history = useHistory();
+    const entityRegistry = useEntityRegistry();
+    const { entityData } = useDomainsContext();
+    const { isOpen, isClosing, toggle, toggleOpen } = useToggle({
+        initialValue: false,
+        closeDelay: 250,
+    });
+    const { sortedDomains } = useListDomains({ parentDomain: domain.urn, skip: !isOpen || shouldHideDomain });
+    const isOnEntityPage = entityData && entityData.urn === domain.urn;
+    const displayName = entityRegistry.getDisplayName(domain.type, isOnEntityPage ? entityData : domain);
+    const isInSelectMode = !!selectDomainOverride;
+    const hasDomainChildren = useHasDomainChildren({ domainUrn: domain.urn, numDomainChildren });
+
+    const shouldAutoOpen = useMemo(
+        () => !isInSelectMode && entityData?.parentDomains?.domains.some((parent) => parent.urn === domain.urn),
+        [isInSelectMode, entityData, domain.urn],
+    );
+
+    useEffect(() => {
+        if (shouldAutoOpen) toggleOpen();
+    }, [shouldAutoOpen, toggleOpen]);
+
+    function handleSelectDomain() {
+        if (selectDomainOverride) {
+            selectDomainOverride(domain);
+        } else {
+            history.push(entityRegistry.getEntityUrl(domain.type, domain.urn));
+        }
+    }
+
+    if (shouldHideDomain) return null;
+
+    return (
+        <>
+            <RowWrapper>
+                {hasDomainChildren && (
+                    <ButtonWrapper>
+                        <RotatingTriangle isOpen={isOpen && !isClosing} onClick={toggle} />
+                    </ButtonWrapper>
+                )}
+                <NameWrapper
+                    ellipsis={{ tooltip: displayName }}
+                    onClick={handleSelectDomain}
+                    isSelected={!!isOnEntityPage && !isInSelectMode}
+                    addLeftPadding={!hasDomainChildren}
+                >
+                    {!isInSelectMode && <DomainIcon />}
+                    {displayName}
+                </NameWrapper>
+            </RowWrapper>
+            <StyledExpander isOpen={isOpen && !isClosing}>
+                <BodyContainer style={{ overflow: 'hidden' }}>
+                    {sortedDomains?.map((childDomain) => (
+                        <DomainNode
+                            key={domain.urn}
+                            domain={childDomain as Domain}
+                            numDomainChildren={childDomain.children?.total || 0}
+                            domainUrnToHide={domainUrnToHide}
+                            selectDomainOverride={selectDomainOverride}
+                        />
+                    ))}
+                </BodyContainer>
+            </StyledExpander>
+        </>
+    );
+}
diff --git a/datahub-web-react/src/app/domain/nestedDomains/domainNavigator/useHasDomainChildren.ts b/datahub-web-react/src/app/domain/nestedDomains/domainNavigator/useHasDomainChildren.ts
new file mode 100644
index 0000000000000..d16d5de23fbaf
--- /dev/null
+++ b/datahub-web-react/src/app/domain/nestedDomains/domainNavigator/useHasDomainChildren.ts
@@ -0,0 +1,29 @@
+import { useEffect } from 'react';
+import { useGetDomainChildrenCountLazyQuery } from '../../../../graphql/domain.generated';
+import { useDomainsContext } from '../../DomainsContext';
+
+interface Props {
+    domainUrn: string;
+    numDomainChildren: number; // number that comes from parent query to render this domain
+}
+
+export default function useHasDomainChildren({ domainUrn, numDomainChildren }: Props) {
+    const { parentDomainsToUpdate, setParentDomainsToUpdate } = useDomainsContext();
+    const [getDomainChildrenCount, { data: childrenData }] = useGetDomainChildrenCountLazyQuery();
+
+    useEffect(() => {
+        let timer;
+        // fetch updated children count to determine if we show triangle toggle
+        if (parentDomainsToUpdate.includes(domainUrn)) {
+            timer = setTimeout(() => {
+                getDomainChildrenCount({ variables: { urn: domainUrn } });
+                setParentDomainsToUpdate(parentDomainsToUpdate.filter((urn) => urn !== domainUrn));
+            }, 2000);
+        }
+        return () => {
+            if (timer) window.clearTimeout(timer);
+        };
+    }, [domainUrn, getDomainChildrenCount, parentDomainsToUpdate, setParentDomainsToUpdate]);
+
+    return childrenData ? !!childrenData.domain?.children?.total : !!numDomainChildren;
+}
diff --git a/datahub-web-react/src/app/domain/useListDomains.tsx b/datahub-web-react/src/app/domain/useListDomains.tsx
new file mode 100644
index 0000000000000..74f6b454f11d4
--- /dev/null
+++ b/datahub-web-react/src/app/domain/useListDomains.tsx
@@ -0,0 +1,27 @@
+import { useListDomainsQuery } from '../../graphql/domain.generated';
+import { useSortedDomains } from './utils';
+
+interface Props {
+    parentDomain?: string;
+    skip?: boolean;
+    sortBy?: 'displayName';
+}
+
+export default function useListDomains({ parentDomain, skip, sortBy = 'displayName' }: Props) {
+    const { data, error, loading, refetch } = useListDomainsQuery({
+        skip,
+        variables: {
+            input: {
+                start: 0,
+                count: 1000, // don't paginate the home page, get all root level domains
+                parentDomain,
+            },
+        },
+        fetchPolicy: 'network-only', // always use network request first to populate cache
+        nextFetchPolicy: 'cache-first', // then use cache after that so we can manipulate it
+    });
+
+    const sortedDomains = useSortedDomains(data?.listDomains?.domains, sortBy);
+
+    return { data, sortedDomains, error, loading, refetch };
+}
diff --git a/datahub-web-react/src/app/domain/utils.ts b/datahub-web-react/src/app/domain/utils.ts
index 3af161bc44565..8273c33e2c41d 100644
--- a/datahub-web-react/src/app/domain/utils.ts
+++ b/datahub-web-react/src/app/domain/utils.ts
@@ -1,9 +1,18 @@
+import { ApolloClient } from '@apollo/client';
+import { useEffect } from 'react';
+import { isEqual } from 'lodash';
 import { ListDomainsDocument, ListDomainsQuery } from '../../graphql/domain.generated';
+import { Entity, EntityType } from '../../types.generated';
+import { GenericEntityProperties } from '../entity/shared/types';
+import usePrevious from '../shared/usePrevious';
+import { useDomainsContext } from './DomainsContext';
+import { useEntityRegistry } from '../useEntityRegistry';
+import EntityRegistry from '../entity/EntityRegistry';
 
 /**
  * Add an entry to the list domains cache.
  */
-export const addToListDomainsCache = (client, newDomain, pageSize) => {
+export const addToListDomainsCache = (client, newDomain, pageSize, parentDomain?: string) => {
     // Read the data from our cache for this query.
     const currData: ListDomainsQuery | null = client.readQuery({
         query: ListDomainsDocument,
@@ -11,6 +20,7 @@ export const addToListDomainsCache = (client, newDomain, pageSize) => {
             input: {
                 start: 0,
                 count: pageSize,
+                parentDomain,
             },
         },
     });
@@ -25,6 +35,7 @@ export const addToListDomainsCache = (client, newDomain, pageSize) => {
             input: {
                 start: 0,
                 count: pageSize,
+                parentDomain,
             },
         },
         data: {
@@ -38,10 +49,39 @@ export const addToListDomainsCache = (client, newDomain, pageSize) => {
     });
 };
 
+export const updateListDomainsCache = (
+    client: ApolloClient<any>,
+    urn: string,
+    id: string | undefined,
+    name: string,
+    description: string | undefined,
+    parentDomain?: string,
+) => {
+    addToListDomainsCache(
+        client,
+        {
+            urn,
+            id: id || null,
+            type: EntityType.Domain,
+            properties: {
+                name,
+                description: description || null,
+            },
+            ownership: null,
+            entities: null,
+            children: null,
+            dataProducts: null,
+            parentDomains: null,
+        },
+        1000,
+        parentDomain,
+    );
+};
+
 /**
  * Remove an entry from the list domains cache.
  */
-export const removeFromListDomainsCache = (client, urn, page, pageSize) => {
+export const removeFromListDomainsCache = (client, urn, page, pageSize, parentDomain?: string) => {
     // Read the data from our cache for this query.
     const currData: ListDomainsQuery | null = client.readQuery({
         query: ListDomainsDocument,
@@ -49,6 +89,7 @@ export const removeFromListDomainsCache = (client, urn, page, pageSize) => {
             input: {
                 start: (page - 1) * pageSize,
                 count: pageSize,
+                parentDomain,
             },
         },
     });
@@ -63,6 +104,7 @@ export const removeFromListDomainsCache = (client, urn, page, pageSize) => {
             input: {
                 start: (page - 1) * pageSize,
                 count: pageSize,
+                parentDomain,
             },
         },
         data: {
@@ -75,3 +117,29 @@ export const removeFromListDomainsCache = (client, urn, page, pageSize) => {
         },
     });
 };
+
+export function useUpdateDomainEntityDataOnChange(entityData: GenericEntityProperties | null, entityType: EntityType) {
+    const { setEntityData } = useDomainsContext();
+    const previousEntityData = usePrevious(entityData);
+
+    useEffect(() => {
+        if (EntityType.Domain === entityType && !isEqual(entityData, previousEntityData)) {
+            setEntityData(entityData);
+        }
+    });
+}
+
+export function useSortedDomains<T extends Entity>(domains?: Array<T>, sortBy?: 'displayName') {
+    const entityRegistry = useEntityRegistry();
+    if (!domains || !sortBy) return domains;
+    return [...domains].sort((a, b) => {
+        const nameA = entityRegistry.getDisplayName(EntityType.Domain, a) || '';
+        const nameB = entityRegistry.getDisplayName(EntityType.Domain, b) || '';
+        return nameA.localeCompare(nameB);
+    });
+}
+
+export function getParentDomains<T extends Entity>(domain: T, entityRegistry: EntityRegistry) {
+    const props = entityRegistry.getGenericEntityProperties(EntityType.Domain, domain);
+    return props?.parentDomains?.domains ?? [];
+}
diff --git a/datahub-web-react/src/app/entity/EntityRegistry.tsx b/datahub-web-react/src/app/entity/EntityRegistry.tsx
index 56b085cf69f4a..6642c2c7b0467 100644
--- a/datahub-web-react/src/app/entity/EntityRegistry.tsx
+++ b/datahub-web-react/src/app/entity/EntityRegistry.tsx
@@ -45,6 +45,12 @@ export default class EntityRegistry {
         return this.entities;
     }
 
+    getEntitiesForSearchRoutes(): Array<Entity<any>> {
+        return this.entities.filter(
+            (entity) => !GLOSSARY_ENTITY_TYPES.includes(entity.type) && entity.type !== EntityType.Domain,
+        );
+    }
+
     getNonGlossaryEntities(): Array<Entity<any>> {
         return this.entities.filter((entity) => !GLOSSARY_ENTITY_TYPES.includes(entity.type));
     }
diff --git a/datahub-web-react/src/app/entity/domain/DomainEntity.tsx b/datahub-web-react/src/app/entity/domain/DomainEntity.tsx
index 3b3045abe2a7c..68c06935dbbe5 100644
--- a/datahub-web-react/src/app/entity/domain/DomainEntity.tsx
+++ b/datahub-web-react/src/app/entity/domain/DomainEntity.tsx
@@ -1,5 +1,4 @@
 import * as React from 'react';
-import { FolderOutlined } from '@ant-design/icons';
 import { Domain, EntityType, SearchResult } from '../../../types.generated';
 import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity';
 import { Preview } from './preview/Preview';
@@ -14,7 +13,7 @@ import { EntityMenuItems } from '../shared/EntityDropdown/EntityDropdown';
 import { EntityActionItem } from '../shared/entity/EntityActions';
 import DataProductsTab from './DataProductsTab/DataProductsTab';
 import { EntityProfileTab } from '../shared/constants';
-// import { EntityActionItem } from '../shared/entity/EntityActions';
+import DomainIcon from '../../domain/DomainIcon';
 
 /**
  * Definition of the DataHub Domain entity.
@@ -24,21 +23,26 @@ export class DomainEntity implements Entity<Domain> {
 
     icon = (fontSize: number, styleType: IconStyleType, color?: string) => {
         if (styleType === IconStyleType.TAB_VIEW) {
-            return <FolderOutlined />;
+            return <DomainIcon />;
         }
 
         if (styleType === IconStyleType.HIGHLIGHT) {
-            return <FolderOutlined style={{ fontSize, color: color || '#B37FEB' }} />;
+            return <DomainIcon style={{ fontSize, color: color || '#B37FEB' }} />;
         }
 
         if (styleType === IconStyleType.SVG) {
             return (
-                <path d="M832 64H192c-17.7 0-32 14.3-32 32v832c0 17.7 14.3 32 32 32h640c17.7 0 32-14.3 32-32V96c0-17.7-14.3-32-32-32zm-600 72h560v208H232V136zm560 480H232V408h560v208zm0 272H232V680h560v208zM304 240a40 40 0 1080 0 40 40 0 10-80 0zm0 272a40 40 0 1080 0 40 40 0 10-80 0zm0 272a40 40 0 1080 0 40 40 0 10-80 0z" />
+                <path
+                    fillRule="evenodd"
+                    clipRule="evenodd"
+                    d="M5.38241 4.45017C5.79578 5.08156 6.64272 5.2583 7.2741 4.84493C7.90549 4.43156 8.08223 3.58462 7.66886 2.95323C7.25549 2.32184 6.40855 2.14511 5.77716 2.55847C5.14578 2.97184 4.96904 3.81878 5.38241 4.45017ZM5.14394 5.70522C5.93376 6.24949 7.0063 6.2952 7.85837 5.73735C8.54707 5.28646 8.93198 4.54586 8.95765 3.78126C10.4383 3.8373 11.732 4.62372 12.4888 5.79063C12.8376 5.61918 13.2124 5.53858 13.5832 5.54251C12.6507 3.85455 10.8528 2.71139 8.78798 2.71139L8.74856 2.71152C8.76299 2.74381 8.77666 2.7763 8.78957 2.80897C8.72977 2.65776 8.65383 2.51033 8.56128 2.36896C7.82523 1.24471 6.31715 0.930005 5.1929 1.66606C4.11266 2.37329 3.77982 3.79327 4.40835 4.90069C3.72087 5.81574 3.31348 6.95324 3.31348 8.18589C3.31348 9.17754 3.57714 10.1076 4.03819 10.9098C4.2114 10.6603 4.43456 10.4392 4.70432 10.2626C4.75298 10.2307 4.80235 10.2008 4.85235 10.1729C4.55031 9.57585 4.38014 8.90074 4.38014 8.18589C4.38014 7.26597 4.66195 6.41188 5.14394 5.70522ZM12.4163 8.72427C12.8297 9.35566 13.6767 9.5324 14.308 9.11903C14.9394 8.70566 15.1162 7.85872 14.7028 7.22733C14.2894 6.59594 13.4425 6.4192 12.8111 6.83257C12.1797 7.24594 12.003 8.09288 12.4163 8.72427ZM13.7973 10.3978C14.1761 10.3609 14.5518 10.2344 14.8923 10.0114C16.0166 9.27539 16.3313 7.76732 15.5952 6.64306C14.8592 5.51881 13.3511 5.2041 12.2268 5.94015C11.1026 6.6762 10.7879 8.18428 11.5239 9.30854C11.8145 9.75234 12.2254 10.07 12.6854 10.2467C11.9457 11.6427 10.4779 12.5937 8.78798 12.5937C8.67553 12.5937 8.56407 12.5895 8.45373 12.5812C8.51807 12.0342 8.39801 11.4624 8.07271 10.9655C7.33667 9.84128 5.82859 9.52658 4.70433 10.2626C3.58008 10.9987 3.26537 12.5068 4.00142 13.631C4.73747 14.7553 6.24555 15.07 7.36981 14.3339C7.66496 14.1407 7.90432 13.8942 8.08351 13.6155C8.31415 13.6451 8.54928 13.6604 8.78798 13.6604C11.0243 13.6604 12.9475 12.3195 13.7973 10.3978ZM6.78554 13.4415C6.15415 13.8549 5.30721 13.6781 4.89384 13.0467C4.48047 12.4154 4.65721 11.5684 5.2886 11.155C5.91999 10.7417 6.76693 10.9184 7.1803 11.5498C7.59367 12.1812 7.41693 13.0281 6.78554 13.4415Z"
+                    fill="currentColor"
+                />
             );
         }
 
         return (
-            <FolderOutlined
+            <DomainIcon
                 style={{
                     fontSize,
                     color: color || '#BFBFBF',
@@ -68,7 +72,7 @@ export class DomainEntity implements Entity<Domain> {
             useEntityQuery={useGetDomainQuery}
             useUpdateQuery={undefined}
             getOverrideProperties={this.getOverridePropertiesFromEntity}
-            headerDropdownItems={new Set([EntityMenuItems.DELETE])}
+            headerDropdownItems={new Set([EntityMenuItems.MOVE, EntityMenuItems.DELETE])}
             headerActionItems={new Set([EntityActionItem.BATCH_ADD_DOMAIN])}
             isNameEditable
             tabs={[
@@ -102,11 +106,11 @@ export class DomainEntity implements Entity<Domain> {
     renderPreview = (_: PreviewType, data: Domain) => {
         return (
             <Preview
+                domain={data}
                 urn={data.urn}
                 name={this.displayName(data)}
                 description={data.properties?.description}
                 owners={data.ownership?.owners}
-                count={data.entities?.total}
                 logoComponent={this.icon(12, IconStyleType.ACCENT)}
             />
         );
@@ -116,11 +120,11 @@ export class DomainEntity implements Entity<Domain> {
         const data = result.entity as Domain;
         return (
             <Preview
+                domain={data}
                 urn={data.urn}
                 name={this.displayName(data)}
                 description={data.properties?.description}
                 owners={data.ownership?.owners}
-                count={data.entities?.total}
                 logoComponent={this.icon(12, IconStyleType.ACCENT)}
             />
         );
diff --git a/datahub-web-react/src/app/entity/domain/preview/DomainEntitiesSnippet.tsx b/datahub-web-react/src/app/entity/domain/preview/DomainEntitiesSnippet.tsx
new file mode 100644
index 0000000000000..6d36964004d64
--- /dev/null
+++ b/datahub-web-react/src/app/entity/domain/preview/DomainEntitiesSnippet.tsx
@@ -0,0 +1,45 @@
+import { DatabaseOutlined, FileDoneOutlined } from '@ant-design/icons';
+import { VerticalDivider } from '@remirror/react';
+import React from 'react';
+import styled from 'styled-components';
+import { SearchResultFields_Domain_Fragment } from '../../../../graphql/search.generated';
+import { ANTD_GRAY_V2 } from '../../shared/constants';
+import DomainIcon from '../../../domain/DomainIcon';
+import { pluralize } from '../../../shared/textUtil';
+
+const Wrapper = styled.div`
+    color: ${ANTD_GRAY_V2[8]};
+    font-size: 12px;
+    display: flex;
+    align-items: center;
+
+    svg {
+        margin-right: 4px;
+    }
+`;
+
+const StyledDivider = styled(VerticalDivider)`
+    &&& {
+        margin: 0 8px;
+    }
+`;
+
+interface Props {
+    domain: SearchResultFields_Domain_Fragment;
+}
+
+export default function DomainEntitiesSnippet({ domain }: Props) {
+    const entityCount = domain.entities?.total || 0;
+    const subDomainCount = domain.children?.total || 0;
+    const dataProductCount = domain.dataProducts?.total || 0;
+
+    return (
+        <Wrapper>
+            <DatabaseOutlined /> {entityCount} {entityCount === 1 ? 'entity' : 'entities'}
+            <StyledDivider />
+            <DomainIcon /> {subDomainCount} {pluralize(subDomainCount, 'sub-domain')}
+            <StyledDivider />
+            <FileDoneOutlined /> {dataProductCount} {pluralize(dataProductCount, 'data product')}
+        </Wrapper>
+    );
+}
diff --git a/datahub-web-react/src/app/entity/domain/preview/Preview.tsx b/datahub-web-react/src/app/entity/domain/preview/Preview.tsx
index 18cb2bb75df03..83198f6eba2d8 100644
--- a/datahub-web-react/src/app/entity/domain/preview/Preview.tsx
+++ b/datahub-web-react/src/app/entity/domain/preview/Preview.tsx
@@ -1,23 +1,24 @@
 import React from 'react';
-import { EntityType, Owner, SearchInsight } from '../../../../types.generated';
+import { Domain, EntityType, Owner, SearchInsight } from '../../../../types.generated';
 import DefaultPreviewCard from '../../../preview/DefaultPreviewCard';
 import { useEntityRegistry } from '../../../useEntityRegistry';
-import { IconStyleType } from '../../Entity';
+import DomainEntitiesSnippet from './DomainEntitiesSnippet';
+import DomainIcon from '../../../domain/DomainIcon';
 
 export const Preview = ({
+    domain,
     urn,
     name,
     description,
     owners,
-    count,
     insights,
     logoComponent,
 }: {
+    domain: Domain;
     urn: string;
     name: string;
     description?: string | null;
     owners?: Array<Owner> | null;
-    count?: number | null;
     insights?: Array<SearchInsight> | null;
     logoComponent?: JSX.Element;
 }): JSX.Element => {
@@ -29,11 +30,19 @@ export const Preview = ({
             urn={urn}
             description={description || ''}
             type="Domain"
-            typeIcon={entityRegistry.getIcon(EntityType.Domain, 14, IconStyleType.ACCENT)}
+            typeIcon={
+                <DomainIcon
+                    style={{
+                        fontSize: 14,
+                        color: '#BFBFBF',
+                    }}
+                />
+            }
             owners={owners}
             insights={insights}
             logoComponent={logoComponent}
-            entityCount={count || undefined}
+            parentEntities={domain.parentDomains?.domains}
+            snippet={<DomainEntitiesSnippet domain={domain} />}
         />
     );
 };
diff --git a/datahub-web-react/src/app/entity/glossaryNode/preview/Preview.tsx b/datahub-web-react/src/app/entity/glossaryNode/preview/Preview.tsx
index 6c6ea163c6786..3938049059e4d 100644
--- a/datahub-web-react/src/app/entity/glossaryNode/preview/Preview.tsx
+++ b/datahub-web-react/src/app/entity/glossaryNode/preview/Preview.tsx
@@ -27,7 +27,7 @@ export const Preview = ({
             owners={owners}
             logoComponent={<FolderOutlined style={{ fontSize: '20px' }} />}
             type={entityRegistry.getEntityName(EntityType.GlossaryNode)}
-            parentNodes={parentNodes}
+            parentEntities={parentNodes?.nodes}
         />
     );
 };
diff --git a/datahub-web-react/src/app/entity/glossaryTerm/preview/Preview.tsx b/datahub-web-react/src/app/entity/glossaryTerm/preview/Preview.tsx
index b6802e37652cb..ee87633cb6fa9 100644
--- a/datahub-web-react/src/app/entity/glossaryTerm/preview/Preview.tsx
+++ b/datahub-web-react/src/app/entity/glossaryTerm/preview/Preview.tsx
@@ -39,7 +39,7 @@ export const Preview = ({
             type="Glossary Term"
             typeIcon={entityRegistry.getIcon(EntityType.GlossaryTerm, 14, IconStyleType.ACCENT)}
             deprecation={deprecation}
-            parentNodes={parentNodes}
+            parentEntities={parentNodes?.nodes}
             domain={domain}
             entityTitleSuffix={
                 <UrlButton href={getRelatedEntitiesUrl(entityRegistry, urn)}>View Related Entities</UrlButton>
diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/DomainParentSelect.tsx b/datahub-web-react/src/app/entity/shared/EntityDropdown/DomainParentSelect.tsx
new file mode 100644
index 0000000000000..d43b04ec11a16
--- /dev/null
+++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/DomainParentSelect.tsx
@@ -0,0 +1,108 @@
+import React, { MouseEvent } from 'react';
+import { Select } from 'antd';
+import { CloseCircleFilled } from '@ant-design/icons';
+import styled from 'styled-components';
+import { Domain, EntityType } from '../../../../types.generated';
+import { useEntityRegistry } from '../../../useEntityRegistry';
+import ClickOutside from '../../../shared/ClickOutside';
+import { BrowserWrapper } from '../../../shared/tags/AddTagsTermsModal';
+import useParentSelector from './useParentSelector';
+import DomainNavigator from '../../../domain/nestedDomains/domainNavigator/DomainNavigator';
+import { useDomainsContext } from '../../../domain/DomainsContext';
+import ParentEntities from '../../../search/filters/ParentEntities';
+import { getParentDomains } from '../../../domain/utils';
+
+const SearchResultContainer = styled.div`
+    display: flex;
+    flex-direction: column;
+    justify-content: center;
+`;
+
+// filter out entity itself and its children
+export function filterResultsForMove(entity: Domain, entityUrn: string) {
+    return (
+        entity.urn !== entityUrn &&
+        entity.__typename === 'Domain' &&
+        !entity.parentDomains?.domains.some((node) => node.urn === entityUrn)
+    );
+}
+
+interface Props {
+    selectedParentUrn: string;
+    setSelectedParentUrn: (parent: string) => void;
+    isMoving?: boolean;
+}
+
+export default function DomainParentSelect({ selectedParentUrn, setSelectedParentUrn, isMoving }: Props) {
+    const entityRegistry = useEntityRegistry();
+    const { entityData } = useDomainsContext();
+    const domainUrn = entityData?.urn;
+
+    const {
+        searchResults,
+        searchQuery,
+        isFocusedOnInput,
+        selectedParentName,
+        selectParentFromBrowser,
+        onSelectParent,
+        handleSearch,
+        clearSelectedParent,
+        setIsFocusedOnInput,
+    } = useParentSelector({
+        entityType: EntityType.Domain,
+        entityData,
+        selectedParentUrn,
+        setSelectedParentUrn,
+    });
+    const domainSearchResultsFiltered =
+        isMoving && domainUrn
+            ? searchResults.filter((r) => filterResultsForMove(r.entity as Domain, domainUrn))
+            : searchResults;
+
+    function selectDomain(domain: Domain) {
+        selectParentFromBrowser(domain.urn, entityRegistry.getDisplayName(EntityType.Domain, domain));
+    }
+
+    const isShowingDomainNavigator = !searchQuery && isFocusedOnInput;
+
+    const handleFocus = () => setIsFocusedOnInput(true);
+    const handleClickOutside = () => setIsFocusedOnInput(false);
+
+    const handleClear = (event: MouseEvent) => {
+        // Prevent, otherwise antd will close the select menu but leaves it focused
+        event.stopPropagation();
+        clearSelectedParent();
+    };
+
+    return (
+        <ClickOutside onClickOutside={handleClickOutside}>
+            <Select
+                showSearch
+                allowClear
+                clearIcon={<CloseCircleFilled onClick={handleClear} />}
+                placeholder="Select"
+                filterOption={false}
+                value={selectedParentName}
+                onSelect={onSelectParent}
+                onSearch={handleSearch}
+                onFocus={handleFocus}
+                dropdownStyle={isShowingDomainNavigator || !searchQuery ? { display: 'none' } : {}}
+            >
+                {domainSearchResultsFiltered.map((result) => (
+                    <Select.Option key={result?.entity?.urn} value={result.entity.urn}>
+                        <SearchResultContainer>
+                            <ParentEntities parentEntities={getParentDomains(result.entity, entityRegistry)} />
+                            {entityRegistry.getDisplayName(result.entity.type, result.entity)}
+                        </SearchResultContainer>
+                    </Select.Option>
+                ))}
+            </Select>
+            <BrowserWrapper isHidden={!isShowingDomainNavigator}>
+                <DomainNavigator
+                    domainUrnToHide={isMoving ? domainUrn : undefined}
+                    selectDomainOverride={selectDomain}
+                />
+            </BrowserWrapper>
+        </ClickOutside>
+    );
+}
diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/EntityDropdown.tsx b/datahub-web-react/src/app/entity/shared/EntityDropdown/EntityDropdown.tsx
index 3442c57ba2d61..be975249b2670 100644
--- a/datahub-web-react/src/app/entity/shared/EntityDropdown/EntityDropdown.tsx
+++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/EntityDropdown.tsx
@@ -20,7 +20,10 @@ import { ANTD_GRAY } from '../constants';
 import { useEntityRegistry } from '../../../useEntityRegistry';
 import useDeleteEntity from './useDeleteEntity';
 import { getEntityProfileDeleteRedirectPath } from '../../../shared/deleteUtils';
-import { isDeleteDisabled } from './utils';
+import { shouldDisplayChildDeletionWarning, isDeleteDisabled, isMoveDisabled } from './utils';
+import { useUserContext } from '../../../context/useUserContext';
+import MoveDomainModal from './MoveDomainModal';
+import { useIsNestedDomainsEnabled } from '../../../useAppConfig';
 
 export enum EntityMenuItems {
     COPY_URL,
@@ -89,8 +92,10 @@ function EntityDropdown(props: Props) {
         options,
     } = props;
 
+    const me = useUserContext();
     const entityRegistry = useEntityRegistry();
     const [updateDeprecation] = useUpdateDeprecationMutation();
+    const isNestedDomainsEnabled = useIsNestedDomainsEnabled();
     const { onDeleteEntity, hasBeenDeleted } = useDeleteEntity(
         urn,
         entityType,
@@ -131,9 +136,9 @@ function EntityDropdown(props: Props) {
 
     const pageUrl = window.location.href;
     const isGlossaryEntity = entityType === EntityType.GlossaryNode || entityType === EntityType.GlossaryTerm;
-    const entityHasChildren = !!entityData?.children?.total;
-    const canManageGlossaryEntity = !!entityData?.privileges?.canManageEntity;
+    const isDomainEntity = entityType === EntityType.Domain;
     const canCreateGlossaryEntity = !!entityData?.privileges?.canManageChildren;
+    const isDomainMoveHidden = !isNestedDomainsEnabled && isDomainEntity;
 
     /**
      * A default path to redirect to if the entity is deleted.
@@ -192,10 +197,10 @@ function EntityDropdown(props: Props) {
                                 </MenuItem>
                             </StyledMenuItem>
                         )}
-                        {menuItems.has(EntityMenuItems.MOVE) && (
+                        {!isDomainMoveHidden && menuItems.has(EntityMenuItems.MOVE) && (
                             <StyledMenuItem
                                 key="4"
-                                disabled={!canManageGlossaryEntity}
+                                disabled={isMoveDisabled(entityType, entityData, me.platformPrivileges)}
                                 onClick={() => setIsMoveModalVisible(true)}
                             >
                                 <MenuItem>
@@ -206,17 +211,16 @@ function EntityDropdown(props: Props) {
                         {menuItems.has(EntityMenuItems.DELETE) && (
                             <StyledMenuItem
                                 key="5"
-                                disabled={isDeleteDisabled(entityType, entityData)}
+                                disabled={isDeleteDisabled(entityType, entityData, me.platformPrivileges)}
                                 onClick={onDeleteEntity}
                             >
                                 <Tooltip
-                                    title={`Can't delete ${entityRegistry.getEntityName(
-                                        entityType,
-                                    )} with child entities.`}
-                                    overlayStyle={
-                                        isGlossaryEntity && canManageGlossaryEntity && entityHasChildren
-                                            ? {}
-                                            : { display: 'none' }
+                                    title={
+                                        shouldDisplayChildDeletionWarning(entityType, entityData, me.platformPrivileges)
+                                            ? `Can't delete ${entityRegistry.getEntityName(entityType)} with ${
+                                                  isDomainEntity ? 'sub-domain' : 'child'
+                                              } entities.`
+                                            : undefined
                                     }
                                 >
                                     <MenuItem>
@@ -252,7 +256,10 @@ function EntityDropdown(props: Props) {
                     refetch={refetchForEntity}
                 />
             )}
-            {isMoveModalVisible && <MoveGlossaryEntityModal onClose={() => setIsMoveModalVisible(false)} />}
+            {isMoveModalVisible && isGlossaryEntity && (
+                <MoveGlossaryEntityModal onClose={() => setIsMoveModalVisible(false)} />
+            )}
+            {isMoveModalVisible && isDomainEntity && <MoveDomainModal onClose={() => setIsMoveModalVisible(false)} />}
             {hasBeenDeleted && !onDelete && deleteRedirectPath && <Redirect to={deleteRedirectPath} />}
         </>
     );
diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveDomainModal.tsx b/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveDomainModal.tsx
new file mode 100644
index 0000000000000..cdbf6fdabf3c9
--- /dev/null
+++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveDomainModal.tsx
@@ -0,0 +1,102 @@
+import React, { useState } from 'react';
+import styled from 'styled-components/macro';
+import { message, Button, Modal, Typography, Form } from 'antd';
+import { useRefetch } from '../EntityContext';
+import { useEntityRegistry } from '../../../useEntityRegistry';
+import { useMoveDomainMutation } from '../../../../graphql/domain.generated';
+import DomainParentSelect from './DomainParentSelect';
+import { useHandleMoveDomainComplete } from './useHandleMoveDomainComplete';
+import { useDomainsContext } from '../../../domain/DomainsContext';
+import { EntityType } from '../../../../types.generated';
+
+const StyledItem = styled(Form.Item)`
+    margin-bottom: 0;
+`;
+
+const OptionalWrapper = styled.span`
+    font-weight: normal;
+`;
+
+interface Props {
+    onClose: () => void;
+}
+
+function MoveDomainModal(props: Props) {
+    const { onClose } = props;
+    const { entityData } = useDomainsContext();
+    const domainUrn = entityData?.urn;
+    const [form] = Form.useForm();
+    const entityRegistry = useEntityRegistry();
+    const [selectedParentUrn, setSelectedParentUrn] = useState('');
+    const refetch = useRefetch();
+
+    const [moveDomainMutation] = useMoveDomainMutation();
+
+    const { handleMoveDomainComplete } = useHandleMoveDomainComplete();
+
+    function moveDomain() {
+        if (!domainUrn) return;
+
+        moveDomainMutation({
+            variables: {
+                input: {
+                    resourceUrn: domainUrn,
+                    parentDomain: selectedParentUrn || undefined,
+                },
+            },
+        })
+            .then(() => {
+                message.loading({ content: 'Updating...', duration: 2 });
+                const newParentToUpdate = selectedParentUrn || undefined;
+                handleMoveDomainComplete(domainUrn, newParentToUpdate);
+                setTimeout(() => {
+                    message.success({
+                        content: `Moved ${entityRegistry.getEntityName(EntityType.Domain)}!`,
+                        duration: 2,
+                    });
+                    refetch();
+                }, 2000);
+            })
+            .catch((e) => {
+                message.destroy();
+                message.error({ content: `Failed to move: \n ${e.message || ''}`, duration: 3 });
+            });
+        onClose();
+    }
+
+    return (
+        <Modal
+            title="Move"
+            visible
+            onCancel={onClose}
+            footer={
+                <>
+                    <Button onClick={onClose} type="text">
+                        Cancel
+                    </Button>
+                    <Button onClick={moveDomain}>Move</Button>
+                </>
+            }
+        >
+            <Form form={form} initialValues={{}} layout="vertical">
+                <Form.Item
+                    label={
+                        <Typography.Text strong>
+                            Move To <OptionalWrapper>(optional)</OptionalWrapper>
+                        </Typography.Text>
+                    }
+                >
+                    <StyledItem name="parent">
+                        <DomainParentSelect
+                            selectedParentUrn={selectedParentUrn}
+                            setSelectedParentUrn={setSelectedParentUrn}
+                            isMoving
+                        />
+                    </StyledItem>
+                </Form.Item>
+            </Form>
+        </Modal>
+    );
+}
+
+export default MoveDomainModal;
diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/NodeParentSelect.tsx b/datahub-web-react/src/app/entity/shared/EntityDropdown/NodeParentSelect.tsx
index 86c2b84a67c3d..c3bfac35c2ca6 100644
--- a/datahub-web-react/src/app/entity/shared/EntityDropdown/NodeParentSelect.tsx
+++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/NodeParentSelect.tsx
@@ -1,12 +1,12 @@
-import React, { useState, useEffect } from 'react';
+import React from 'react';
 import { Select } from 'antd';
-import { useGetSearchResultsLazyQuery } from '../../../../graphql/search.generated';
-import { EntityType, GlossaryNode } from '../../../../types.generated';
+import { EntityType, GlossaryNode, SearchResult } from '../../../../types.generated';
 import { useEntityRegistry } from '../../../useEntityRegistry';
 import { useEntityData } from '../EntityContext';
 import ClickOutside from '../../../shared/ClickOutside';
 import GlossaryBrowser from '../../../glossary/GlossaryBrowser/GlossaryBrowser';
 import { BrowserWrapper } from '../../../shared/tags/AddTagsTermsModal';
+import useParentSelector from './useParentSelector';
 
 // filter out entity itself and its children
 export function filterResultsForMove(entity: GlossaryNode, entityUrn: string) {
@@ -25,60 +25,29 @@ interface Props {
 
 function NodeParentSelect(props: Props) {
     const { selectedParentUrn, setSelectedParentUrn, isMoving } = props;
-    const [selectedParentName, setSelectedParentName] = useState('');
-    const [isFocusedOnInput, setIsFocusedOnInput] = useState(false);
-    const [searchQuery, setSearchQuery] = useState('');
     const entityRegistry = useEntityRegistry();
     const { entityData, urn: entityDataUrn, entityType } = useEntityData();
 
-    const [nodeSearch, { data: nodeData }] = useGetSearchResultsLazyQuery();
-    let nodeSearchResults = nodeData?.search?.searchResults || [];
-    if (isMoving) {
-        nodeSearchResults = nodeSearchResults.filter((r) =>
-            filterResultsForMove(r.entity as GlossaryNode, entityDataUrn),
-        );
-    }
-
-    useEffect(() => {
-        if (entityData && selectedParentUrn === entityDataUrn) {
-            const displayName = entityRegistry.getDisplayName(EntityType.GlossaryNode, entityData);
-            setSelectedParentName(displayName);
-        }
-    }, [entityData, entityRegistry, selectedParentUrn, entityDataUrn]);
-
-    function handleSearch(text: string) {
-        setSearchQuery(text);
-        nodeSearch({
-            variables: {
-                input: {
-                    type: EntityType.GlossaryNode,
-                    query: text,
-                    start: 0,
-                    count: 5,
-                },
-            },
-        });
-    }
+    const {
+        searchResults,
+        searchQuery,
+        isFocusedOnInput,
+        selectedParentName,
+        selectParentFromBrowser,
+        onSelectParent,
+        handleSearch,
+        clearSelectedParent,
+        setIsFocusedOnInput,
+    } = useParentSelector({
+        entityType: EntityType.GlossaryNode,
+        entityData,
+        selectedParentUrn,
+        setSelectedParentUrn,
+    });
 
-    function onSelectParentNode(parentNodeUrn: string) {
-        const selectedNode = nodeSearchResults.find((result) => result.entity.urn === parentNodeUrn);
-        if (selectedNode) {
-            setSelectedParentUrn(parentNodeUrn);
-            const displayName = entityRegistry.getDisplayName(selectedNode.entity.type, selectedNode.entity);
-            setSelectedParentName(displayName);
-        }
-    }
-
-    function clearSelectedParent() {
-        setSelectedParentUrn('');
-        setSelectedParentName('');
-        setSearchQuery('');
-    }
-
-    function selectNodeFromBrowser(urn: string, displayName: string) {
-        setIsFocusedOnInput(false);
-        setSelectedParentUrn(urn);
-        setSelectedParentName(displayName);
+    let nodeSearchResults: SearchResult[] = [];
+    if (isMoving) {
+        nodeSearchResults = searchResults.filter((r) => filterResultsForMove(r.entity as GlossaryNode, entityDataUrn));
     }
 
     const isShowingGlossaryBrowser = !searchQuery && isFocusedOnInput;
@@ -91,7 +60,7 @@ function NodeParentSelect(props: Props) {
                 allowClear
                 filterOption={false}
                 value={selectedParentName}
-                onSelect={onSelectParentNode}
+                onSelect={onSelectParent}
                 onSearch={handleSearch}
                 onClear={clearSelectedParent}
                 onFocus={() => setIsFocusedOnInput(true)}
@@ -107,7 +76,7 @@ function NodeParentSelect(props: Props) {
                 <GlossaryBrowser
                     isSelecting
                     hideTerms
-                    selectNode={selectNodeFromBrowser}
+                    selectNode={selectParentFromBrowser}
                     nodeUrnToHide={shouldHideSelf ? entityData?.urn : undefined}
                 />
             </BrowserWrapper>
diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/useDeleteEntity.tsx b/datahub-web-react/src/app/entity/shared/EntityDropdown/useDeleteEntity.tsx
index c4647b995337b..1e4737135ed74 100644
--- a/datahub-web-react/src/app/entity/shared/EntityDropdown/useDeleteEntity.tsx
+++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/useDeleteEntity.tsx
@@ -6,6 +6,7 @@ import { getDeleteEntityMutation } from '../../../shared/deleteUtils';
 import analytics, { EventType } from '../../../analytics';
 import { useGlossaryEntityData } from '../GlossaryEntityContext';
 import { getParentNodeToUpdate, updateGlossarySidebar } from '../../../glossary/utils';
+import { useHandleDeleteDomain } from './useHandleDeleteDomain';
 
 /**
  * Performs the flow for deleting an entity of a given type.
@@ -25,6 +26,7 @@ function useDeleteEntity(
     const [hasBeenDeleted, setHasBeenDeleted] = useState(false);
     const entityRegistry = useEntityRegistry();
     const { isInGlossaryContext, urnsToUpdate, setUrnsToUpdate } = useGlossaryEntityData();
+    const { handleDeleteDomain } = useHandleDeleteDomain({ entityData, urn });
 
     const maybeDeleteEntity = getDeleteEntityMutation(type)();
     const deleteEntity = (maybeDeleteEntity && maybeDeleteEntity[0]) || undefined;
@@ -47,6 +49,11 @@ function useDeleteEntity(
                         duration: 2,
                     });
                 }
+
+                if (entityData.type === EntityType.Domain) {
+                    handleDeleteDomain();
+                }
+
                 setTimeout(
                     () => {
                         setHasBeenDeleted(true);
diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/useHandleDeleteDomain.ts b/datahub-web-react/src/app/entity/shared/EntityDropdown/useHandleDeleteDomain.ts
new file mode 100644
index 0000000000000..ebbb8f9968a6a
--- /dev/null
+++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/useHandleDeleteDomain.ts
@@ -0,0 +1,27 @@
+import { useApolloClient } from '@apollo/client';
+import { GenericEntityProperties } from '../types';
+import { removeFromListDomainsCache } from '../../../domain/utils';
+import { useDomainsContext } from '../../../domain/DomainsContext';
+
+interface DeleteDomainProps {
+    entityData: GenericEntityProperties;
+    urn: string;
+}
+
+export function useHandleDeleteDomain({ entityData, urn }: DeleteDomainProps) {
+    const client = useApolloClient();
+    const { parentDomainsToUpdate, setParentDomainsToUpdate } = useDomainsContext();
+
+    const handleDeleteDomain = () => {
+        if (entityData.parentDomains && entityData.parentDomains.domains.length > 0) {
+            const parentDomainUrn = entityData.parentDomains.domains[0].urn;
+
+            removeFromListDomainsCache(client, urn, 1, 1000, parentDomainUrn);
+            setParentDomainsToUpdate([...parentDomainsToUpdate, parentDomainUrn]);
+        } else {
+            removeFromListDomainsCache(client, urn, 1, 1000);
+        }
+    };
+
+    return { handleDeleteDomain };
+}
diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/useHandleMoveDomainComplete.ts b/datahub-web-react/src/app/entity/shared/EntityDropdown/useHandleMoveDomainComplete.ts
new file mode 100644
index 0000000000000..81f19331e18b7
--- /dev/null
+++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/useHandleMoveDomainComplete.ts
@@ -0,0 +1,40 @@
+import { useApolloClient } from '@apollo/client';
+import { removeFromListDomainsCache, updateListDomainsCache } from '../../../domain/utils';
+import { useDomainsContext } from '../../../domain/DomainsContext';
+import { Domain } from '../../../../types.generated';
+import analytics from '../../../analytics/analytics';
+import { EventType } from '../../../analytics';
+
+export function useHandleMoveDomainComplete() {
+    const client = useApolloClient();
+    const { entityData, parentDomainsToUpdate, setParentDomainsToUpdate } = useDomainsContext();
+
+    const handleMoveDomainComplete = (urn: string, newParentUrn?: string) => {
+        if (!entityData) return;
+
+        const domain = entityData as Domain;
+        const oldParentUrn = domain.parentDomains?.domains.length ? domain.parentDomains.domains[0].urn : undefined;
+
+        analytics.event({
+            type: EventType.MoveDomainEvent,
+            oldParentDomainUrn: oldParentUrn,
+            parentDomainUrn: newParentUrn,
+        });
+
+        removeFromListDomainsCache(client, urn, 1, 1000, oldParentUrn);
+        updateListDomainsCache(
+            client,
+            domain.urn,
+            undefined,
+            domain.properties?.name ?? '',
+            domain.properties?.description ?? '',
+            newParentUrn,
+        );
+        const newParentDomainsToUpdate = [...parentDomainsToUpdate];
+        if (oldParentUrn) newParentDomainsToUpdate.push(oldParentUrn);
+        if (newParentUrn) newParentDomainsToUpdate.push(newParentUrn);
+        setParentDomainsToUpdate(newParentDomainsToUpdate);
+    };
+
+    return { handleMoveDomainComplete };
+}
diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/useParentSelector.ts b/datahub-web-react/src/app/entity/shared/EntityDropdown/useParentSelector.ts
new file mode 100644
index 0000000000000..32b5d8ca790cc
--- /dev/null
+++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/useParentSelector.ts
@@ -0,0 +1,76 @@
+import { useEffect, useState } from 'react';
+import { useGetSearchResultsLazyQuery } from '../../../../graphql/search.generated';
+import { EntityType } from '../../../../types.generated';
+import { useEntityRegistry } from '../../../useEntityRegistry';
+import { GenericEntityProperties } from '../types';
+
+interface Props {
+    entityType: EntityType;
+    entityData: GenericEntityProperties | null;
+    selectedParentUrn: string;
+    setSelectedParentUrn: (parent: string) => void;
+}
+
+export default function useParentSelector({ entityType, entityData, selectedParentUrn, setSelectedParentUrn }: Props) {
+    const [selectedParentName, setSelectedParentName] = useState<string>();
+    const [isFocusedOnInput, setIsFocusedOnInput] = useState(false);
+    const [searchQuery, setSearchQuery] = useState('');
+    const entityRegistry = useEntityRegistry();
+
+    const [search, { data }] = useGetSearchResultsLazyQuery();
+    const searchResults = data?.search?.searchResults || [];
+
+    useEffect(() => {
+        if (entityData && selectedParentUrn === entityData.urn) {
+            const displayName = entityRegistry.getDisplayName(entityType, entityData);
+            setSelectedParentName(displayName);
+        }
+    }, [entityData, entityRegistry, selectedParentUrn, entityData?.urn, entityType]);
+
+    function handleSearch(text: string) {
+        setSearchQuery(text);
+        search({
+            variables: {
+                input: {
+                    type: entityType,
+                    query: text,
+                    start: 0,
+                    count: 5,
+                },
+            },
+        });
+    }
+
+    function onSelectParent(parentUrn: string) {
+        const selectedParent = searchResults.find((result) => result.entity.urn === parentUrn);
+        if (selectedParent) {
+            setSelectedParentUrn(parentUrn);
+            const displayName = entityRegistry.getDisplayName(selectedParent.entity.type, selectedParent.entity);
+            setSelectedParentName(displayName);
+        }
+    }
+
+    function clearSelectedParent() {
+        setSelectedParentUrn('');
+        setSelectedParentName(undefined);
+        setSearchQuery('');
+    }
+
+    function selectParentFromBrowser(urn: string, displayName: string) {
+        setIsFocusedOnInput(false);
+        setSelectedParentUrn(urn);
+        setSelectedParentName(displayName);
+    }
+
+    return {
+        searchQuery,
+        searchResults,
+        isFocusedOnInput,
+        selectedParentName,
+        onSelectParent,
+        handleSearch,
+        setIsFocusedOnInput,
+        selectParentFromBrowser,
+        clearSelectedParent,
+    };
+}
diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/utils.ts b/datahub-web-react/src/app/entity/shared/EntityDropdown/utils.ts
index 9e3d14cfd32e1..0a4c2c34441a4 100644
--- a/datahub-web-react/src/app/entity/shared/EntityDropdown/utils.ts
+++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/utils.ts
@@ -1,7 +1,11 @@
-import { EntityType } from '../../../../types.generated';
+import { EntityType, PlatformPrivileges } from '../../../../types.generated';
 import { GenericEntityProperties } from '../types';
 
-export function isDeleteDisabled(entityType: EntityType, entityData: GenericEntityProperties | null) {
+export function isDeleteDisabled(
+    entityType: EntityType,
+    entityData: GenericEntityProperties | null,
+    platformPrivileges: PlatformPrivileges | null | undefined,
+) {
     if (entityType === EntityType.GlossaryTerm || entityType === EntityType.GlossaryNode) {
         const entityHasChildren = !!entityData?.children?.total;
         const canManageGlossaryEntity = !!entityData?.privileges?.canManageEntity;
@@ -11,5 +15,47 @@ export function isDeleteDisabled(entityType: EntityType, entityData: GenericEnti
     if (entityType === EntityType.DataProduct) {
         return false; // TODO: update with permissions
     }
+    if (entityType === EntityType.Domain) {
+        const entityHasChildren = !!entityData?.children?.total;
+        const canManageDomains = !!platformPrivileges?.manageDomains;
+        const canDeleteDomainEntity = !entityHasChildren && canManageDomains;
+        return !canDeleteDomainEntity;
+    }
+    return false;
+}
+
+export function isMoveDisabled(
+    entityType: EntityType,
+    entityData: GenericEntityProperties | null,
+    platformPrivileges: PlatformPrivileges | null | undefined,
+) {
+    if (entityType === EntityType.GlossaryTerm || entityType === EntityType.GlossaryNode) {
+        const canManageGlossaryEntity = !!entityData?.privileges?.canManageEntity;
+        return !canManageGlossaryEntity;
+    }
+    if (entityType === EntityType.Domain) {
+        const canManageDomains = !!platformPrivileges?.manageDomains;
+        return !canManageDomains;
+    }
+    return false;
+}
+
+export function shouldDisplayChildDeletionWarning(
+    entityType: EntityType,
+    entityData: GenericEntityProperties | null,
+    platformPrivileges: PlatformPrivileges | null | undefined,
+) {
+    if (entityType === EntityType.GlossaryTerm || entityType === EntityType.GlossaryNode) {
+        const entityHasChildren = !!entityData?.children?.total;
+        const canManageGlossaryEntity = !!entityData?.privileges?.canManageEntity;
+        const hasTooltip = entityHasChildren && canManageGlossaryEntity;
+        return hasTooltip;
+    }
+    if (entityType === EntityType.Domain) {
+        const entityHasChildren = !!entityData?.children?.total;
+        const canManageDomains = !!platformPrivileges?.manageDomains;
+        const hasTooltip = entityHasChildren && canManageDomains;
+        return hasTooltip;
+    }
     return false;
 }
diff --git a/datahub-web-react/src/app/entity/shared/constants.ts b/datahub-web-react/src/app/entity/shared/constants.ts
index 447780fb0d641..9df5923d18542 100644
--- a/datahub-web-react/src/app/entity/shared/constants.ts
+++ b/datahub-web-react/src/app/entity/shared/constants.ts
@@ -21,6 +21,7 @@ export const ANTD_GRAY = {
 };
 
 export const ANTD_GRAY_V2 = {
+    1: '#F8F9Fa',
     2: '#F3F5F6',
     5: '#DDE0E4',
     6: '#B2B8BD',
diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx
index 8a559013c892c..5384eb94429ed 100644
--- a/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx
+++ b/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx
@@ -45,6 +45,7 @@ import {
     LINEAGE_GRAPH_TIME_FILTER_ID,
 } from '../../../../onboarding/config/LineageGraphOnboardingConfig';
 import { useAppConfig } from '../../../../useAppConfig';
+import { useUpdateDomainEntityDataOnChange } from '../../../../domain/utils';
 
 type Props<T, U> = {
     urn: string;
@@ -212,6 +213,7 @@ export const EntityProfile = <T, U>({
         useGetDataForProfile({ urn, entityType, useEntityQuery, getOverrideProperties });
 
     useUpdateGlossaryEntityDataOnChange(entityData, entityType);
+    useUpdateDomainEntityDataOnChange(entityData, entityType);
 
     const maybeUpdateEntity = useUpdateQuery?.({
         onCompleted: () => refetch(),
diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityName.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityName.tsx
index d6df1cf8818df..762bd5f9111a0 100644
--- a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityName.tsx
+++ b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityName.tsx
@@ -33,17 +33,27 @@ function EntityName(props: Props) {
     const { urn, entityType, entityData } = useEntityData();
     const entityName = entityData ? entityRegistry.getDisplayName(entityType, entityData) : '';
     const [updatedName, setUpdatedName] = useState(entityName);
+    const [isEditing, setIsEditing] = useState(false);
 
     useEffect(() => {
         setUpdatedName(entityName);
     }, [entityName]);
 
-    const [updateName] = useUpdateNameMutation();
+    const [updateName, { loading: isMutatingName }] = useUpdateNameMutation();
 
-    const handleSaveName = (name: string) => {
+    const handleStartEditing = () => {
+        setIsEditing(true);
+    };
+
+    const handleChangeName = (name: string) => {
+        if (name === entityName) {
+            setIsEditing(false);
+            return;
+        }
         setUpdatedName(name);
         updateName({ variables: { input: { name, urn } } })
             .then(() => {
+                setIsEditing(false);
                 message.success({ content: 'Name Updated', duration: 2 });
                 refetch();
                 if (isInGlossaryContext) {
@@ -62,13 +72,19 @@ function EntityName(props: Props) {
     return (
         <>
             {isNameEditable ? (
-                <EntityTitle level={3} editable={{ onChange: handleSaveName }}>
+                <EntityTitle
+                    level={3}
+                    disabled={isMutatingName}
+                    editable={{
+                        editing: isEditing,
+                        onChange: handleChangeName,
+                        onStart: handleStartEditing,
+                    }}
+                >
                     {updatedName}
                 </EntityTitle>
             ) : (
-                <EntityTitle level={3}>
-                    {entityData && entityRegistry.getDisplayName(entityType, entityData)}
-                </EntityTitle>
+                <EntityTitle level={3}>{entityName}</EntityTitle>
             )}
         </>
     );
diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/header/PlatformContent/PlatformContentContainer.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/header/PlatformContent/PlatformContentContainer.tsx
index 5e87f093c3778..0eb223c04d439 100644
--- a/datahub-web-react/src/app/entity/shared/containers/profile/header/PlatformContent/PlatformContentContainer.tsx
+++ b/datahub-web-react/src/app/entity/shared/containers/profile/header/PlatformContent/PlatformContentContainer.tsx
@@ -50,6 +50,7 @@ function PlatformContentContainer() {
             parentContainers={entityData?.parentContainers?.containers}
             parentContainersRef={contentRef}
             areContainersTruncated={isContentTruncated}
+            parentEntities={entityData?.parentDomains?.domains}
         />
     );
 }
diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/header/PlatformContent/PlatformContentView.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/header/PlatformContent/PlatformContentView.tsx
index 51a422ba93418..1090dac501d0b 100644
--- a/datahub-web-react/src/app/entity/shared/containers/profile/header/PlatformContent/PlatformContentView.tsx
+++ b/datahub-web-react/src/app/entity/shared/containers/profile/header/PlatformContent/PlatformContentView.tsx
@@ -2,15 +2,16 @@ import React from 'react';
 import styled from 'styled-components';
 import { Typography, Image } from 'antd';
 import { Maybe } from 'graphql/jsutils/Maybe';
-import { Container, GlossaryNode } from '../../../../../../../types.generated';
+import { Container, Entity } from '../../../../../../../types.generated';
 import { ANTD_GRAY } from '../../../../constants';
 import ContainerLink from './ContainerLink';
-import ParentNodesView, {
+import {
     StyledRightOutlined,
     ParentNodesWrapper as ParentContainersWrapper,
     Ellipsis,
     StyledTooltip,
 } from './ParentNodesView';
+import ParentEntities from '../../../../../../search/filters/ParentEntities';
 
 const LogoIcon = styled.span`
     display: flex;
@@ -75,14 +76,14 @@ interface Props {
     typeIcon?: JSX.Element;
     entityType?: string;
     parentContainers?: Maybe<Container>[] | null;
-    parentNodes?: GlossaryNode[] | null;
+    parentEntities?: Entity[] | null;
     parentContainersRef: React.RefObject<HTMLDivElement>;
     areContainersTruncated: boolean;
 }
 
 function PlatformContentView(props: Props) {
     const {
-        parentNodes,
+        parentEntities,
         platformName,
         platformLogoUrl,
         platformNames,
@@ -103,7 +104,7 @@ function PlatformContentView(props: Props) {
         <PlatformContentWrapper>
             {typeIcon && <LogoIcon>{typeIcon}</LogoIcon>}
             <PlatformText>{entityType}</PlatformText>
-            {(!!platformName || !!instanceId || !!parentContainers?.length || !!parentNodes?.length) && (
+            {(!!platformName || !!instanceId || !!parentContainers?.length || !!parentEntities?.length) && (
                 <PlatformDivider />
             )}
             {platformName && (
@@ -146,7 +147,7 @@ function PlatformContentView(props: Props) {
                 </ParentContainersWrapper>
                 {directParentContainer && <ContainerLink container={directParentContainer} />}
             </StyledTooltip>
-            <ParentNodesView parentNodes={parentNodes} />
+            <ParentEntities parentEntities={parentEntities || []} numVisible={3} />
         </PlatformContentWrapper>
     );
 }
diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Domain/SetDomainModal.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Domain/SetDomainModal.tsx
index fe49409b00653..405442e8d7f50 100644
--- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Domain/SetDomainModal.tsx
+++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Domain/SetDomainModal.tsx
@@ -2,14 +2,16 @@ import React, { useRef, useState } from 'react';
 import { Button, Form, message, Modal, Select } from 'antd';
 
 import { useGetSearchResultsLazyQuery } from '../../../../../../../graphql/search.generated';
-import { Entity, EntityType } from '../../../../../../../types.generated';
+import { Domain, Entity, EntityType } from '../../../../../../../types.generated';
 import { useBatchSetDomainMutation } from '../../../../../../../graphql/mutations.generated';
 import { useEntityRegistry } from '../../../../../../useEntityRegistry';
 import { useEnterKeyListener } from '../../../../../../shared/useEnterKeyListener';
-import { useGetRecommendations } from '../../../../../../shared/recommendation';
 import { DomainLabel } from '../../../../../../shared/DomainLabel';
 import { handleBatchError } from '../../../../utils';
 import { tagRender } from '../tagRenderer';
+import { BrowserWrapper } from '../../../../../../shared/tags/AddTagsTermsModal';
+import DomainNavigator from '../../../../../../domain/nestedDomains/domainNavigator/DomainNavigator';
+import ClickOutside from '../../../../../../shared/ClickOutside';
 
 type Props = {
     urns: string[];
@@ -28,6 +30,7 @@ type SelectedDomain = {
 
 export const SetDomainModal = ({ urns, onCloseModal, refetch, defaultValue, onOkOverride, titleOverride }: Props) => {
     const entityRegistry = useEntityRegistry();
+    const [isFocusedOnInput, setIsFocusedOnInput] = useState(false);
     const [inputValue, setInputValue] = useState('');
     const [selectedDomain, setSelectedDomain] = useState<SelectedDomain | undefined>(
         defaultValue
@@ -42,8 +45,8 @@ export const SetDomainModal = ({ urns, onCloseModal, refetch, defaultValue, onOk
     const domainSearchResults =
         domainSearchData?.search?.searchResults?.map((searchResult) => searchResult.entity) || [];
     const [batchSetDomainMutation] = useBatchSetDomainMutation();
-    const [recommendedData] = useGetRecommendations([EntityType.Domain]);
     const inputEl = useRef(null);
+    const isShowingDomainNavigator = !inputValue && isFocusedOnInput;
 
     const onModalClose = () => {
         setInputValue('');
@@ -74,7 +77,7 @@ export const SetDomainModal = ({ urns, onCloseModal, refetch, defaultValue, onOk
         );
     };
 
-    const domainResult = !inputValue || inputValue.length === 0 ? recommendedData : domainSearchResults;
+    const domainResult = !inputValue || inputValue.length === 0 ? [] : domainSearchResults;
 
     const domainSearchOptions = domainResult?.map((result) => {
         return renderSearchResult(result);
@@ -95,6 +98,15 @@ export const SetDomainModal = ({ urns, onCloseModal, refetch, defaultValue, onOk
         }
     };
 
+    function selectDomainFromBrowser(domain: Domain) {
+        setIsFocusedOnInput(false);
+        setSelectedDomain({
+            displayName: entityRegistry.getDisplayName(EntityType.Domain, domain),
+            type: EntityType.Domain,
+            urn: domain.urn,
+        });
+    }
+
     const onDeselectDomain = () => {
         setInputValue('');
         setSelectedDomain(undefined);
@@ -148,6 +160,11 @@ export const SetDomainModal = ({ urns, onCloseModal, refetch, defaultValue, onOk
         setInputValue('');
     }
 
+    function handleCLickOutside() {
+        // delay closing the domain navigator so we don't get a UI "flash" between showing search results and navigator
+        setTimeout(() => setIsFocusedOnInput(false), 0);
+    }
+
     return (
         <Modal
             title={titleOverride || 'Set Domain'}
@@ -166,29 +183,36 @@ export const SetDomainModal = ({ urns, onCloseModal, refetch, defaultValue, onOk
         >
             <Form component={false}>
                 <Form.Item>
-                    <Select
-                        autoFocus
-                        defaultOpen
-                        filterOption={false}
-                        showSearch
-                        mode="multiple"
-                        defaultActiveFirstOption={false}
-                        placeholder="Search for Domains..."
-                        onSelect={(domainUrn: any) => onSelectDomain(domainUrn)}
-                        onDeselect={onDeselectDomain}
-                        onSearch={(value: string) => {
-                            // eslint-disable-next-line react/prop-types
-                            handleSearch(value.trim());
-                            // eslint-disable-next-line react/prop-types
-                            setInputValue(value.trim());
-                        }}
-                        ref={inputEl}
-                        value={selectValue}
-                        tagRender={tagRender}
-                        onBlur={handleBlur}
-                    >
-                        {domainSearchOptions}
-                    </Select>
+                    <ClickOutside onClickOutside={handleCLickOutside}>
+                        <Select
+                            autoFocus
+                            defaultOpen
+                            filterOption={false}
+                            showSearch
+                            mode="multiple"
+                            defaultActiveFirstOption={false}
+                            placeholder="Search for Domains..."
+                            onSelect={(domainUrn: any) => onSelectDomain(domainUrn)}
+                            onDeselect={onDeselectDomain}
+                            onSearch={(value: string) => {
+                                // eslint-disable-next-line react/prop-types
+                                handleSearch(value.trim());
+                                // eslint-disable-next-line react/prop-types
+                                setInputValue(value.trim());
+                            }}
+                            ref={inputEl}
+                            value={selectValue}
+                            tagRender={tagRender}
+                            onBlur={handleBlur}
+                            onFocus={() => setIsFocusedOnInput(true)}
+                            dropdownStyle={isShowingDomainNavigator ? { display: 'none' } : {}}
+                        >
+                            {domainSearchOptions}
+                        </Select>
+                        <BrowserWrapper isHidden={!isShowingDomainNavigator}>
+                            <DomainNavigator selectDomainOverride={selectDomainFromBrowser} />
+                        </BrowserWrapper>
+                    </ClickOutside>
                 </Form.Item>
             </Form>
         </Modal>
diff --git a/datahub-web-react/src/app/entity/shared/types.ts b/datahub-web-react/src/app/entity/shared/types.ts
index e36f5050a24b7..6596711d4e82a 100644
--- a/datahub-web-react/src/app/entity/shared/types.ts
+++ b/datahub-web-react/src/app/entity/shared/types.ts
@@ -37,6 +37,7 @@ import {
     FabricType,
     BrowsePathV2,
     DataJobInputOutput,
+    ParentDomainsResult,
 } from '../../../types.generated';
 import { FetchedEntity } from '../../lineage/types';
 
@@ -65,6 +66,7 @@ export type EntitySubHeaderSection = {
 
 export type GenericEntityProperties = {
     urn?: string;
+    type?: EntityType;
     name?: Maybe<string>;
     properties?: Maybe<{
         description?: Maybe<string>;
@@ -98,6 +100,7 @@ export type GenericEntityProperties = {
     status?: Maybe<Status>;
     deprecation?: Maybe<Deprecation>;
     parentContainers?: Maybe<ParentContainersResult>;
+    parentDomains?: Maybe<ParentDomainsResult>;
     children?: Maybe<EntityRelationshipsResult>;
     parentNodes?: Maybe<ParentNodesResult>;
     isAChildren?: Maybe<EntityRelationshipsResult>;
diff --git a/datahub-web-react/src/app/glossary/BusinessGlossaryPage.tsx b/datahub-web-react/src/app/glossary/BusinessGlossaryPage.tsx
index 2adeb6b1684dc..11f54cb5078e6 100644
--- a/datahub-web-react/src/app/glossary/BusinessGlossaryPage.tsx
+++ b/datahub-web-react/src/app/glossary/BusinessGlossaryPage.tsx
@@ -38,12 +38,6 @@ const MainContentWrapper = styled.div`
     flex-direction: column;
 `;
 
-export const BrowserWrapper = styled.div<{ width: number }>`
-    max-height: 100%;
-    width: ${(props) => props.width}px;
-    min-width: ${(props) => props.width}px;
-`;
-
 export const MAX_BROWSER_WIDTH = 500;
 export const MIN_BROWSWER_WIDTH = 200;
 
diff --git a/datahub-web-react/src/app/glossary/GlossarySidebar.tsx b/datahub-web-react/src/app/glossary/GlossarySidebar.tsx
index 0bdcbf707ce09..2d620fb06df38 100644
--- a/datahub-web-react/src/app/glossary/GlossarySidebar.tsx
+++ b/datahub-web-react/src/app/glossary/GlossarySidebar.tsx
@@ -1,14 +1,8 @@
 import React, { useState } from 'react';
-import styled from 'styled-components/macro';
 import GlossarySearch from './GlossarySearch';
 import GlossaryBrowser from './GlossaryBrowser/GlossaryBrowser';
 import { ProfileSidebarResizer } from '../entity/shared/containers/profile/sidebar/ProfileSidebarResizer';
-
-const BrowserWrapper = styled.div<{ width: number }>`
-    max-height: 100%;
-    width: ${(props) => props.width}px;
-    min-width: ${(props) => props.width}px;
-`;
+import { SidebarWrapper } from '../shared/sidebar/components';
 
 export const MAX_BROWSER_WIDTH = 500;
 export const MIN_BROWSWER_WIDTH = 200;
@@ -18,10 +12,10 @@ export default function GlossarySidebar() {
 
     return (
         <>
-            <BrowserWrapper width={browserWidth}>
+            <SidebarWrapper width={browserWidth}>
                 <GlossarySearch />
                 <GlossaryBrowser openToEntity />
-            </BrowserWrapper>
+            </SidebarWrapper>
             <ProfileSidebarResizer
                 setSidePanelWidth={(width) =>
                     setBrowserWith(Math.min(Math.max(width, MIN_BROWSWER_WIDTH), MAX_BROWSER_WIDTH))
diff --git a/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx b/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx
index c57273c2ea3d9..1520388a5033a 100644
--- a/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx
+++ b/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx
@@ -1,4 +1,4 @@
-import React, { useMemo } from 'react';
+import React, { useMemo, useState } from 'react';
 import { Link } from 'react-router-dom';
 import { Form, Select, Tag, Tooltip, Typography } from 'antd';
 import styled from 'styled-components/macro';
@@ -9,7 +9,7 @@ import {
     useGetSearchResultsForMultipleLazyQuery,
     useGetSearchResultsLazyQuery,
 } from '../../../graphql/search.generated';
-import { ResourceFilter, PolicyType, EntityType } from '../../../types.generated';
+import { ResourceFilter, PolicyType, EntityType, Domain } from '../../../types.generated';
 import {
     convertLegacyResourceFilter,
     createCriterionValue,
@@ -21,6 +21,9 @@ import {
     mapResourceTypeToPrivileges,
     setFieldValues,
 } from './policyUtils';
+import DomainNavigator from '../../domain/nestedDomains/domainNavigator/DomainNavigator';
+import { BrowserWrapper } from '../../shared/tags/AddTagsTermsModal';
+import ClickOutside from '../../shared/ClickOutside';
 
 type Props = {
     policyType: PolicyType;
@@ -55,6 +58,8 @@ export default function PolicyPrivilegeForm({
     setPrivileges,
 }: Props) {
     const entityRegistry = useEntityRegistry();
+    const [domainInputValue, setDomainInputValue] = useState('');
+    const [isFocusedOnInput, setIsFocusedOnInput] = useState(false);
 
     // Configuration used for displaying options
     const {
@@ -98,6 +103,7 @@ export default function PolicyPrivilegeForm({
     const resourceSelectValue = resourceEntities.map((criterionValue) => criterionValue.value);
     const domainSelectValue = getFieldValues(resources.filter, 'DOMAIN').map((criterionValue) => criterionValue.value);
     const privilegesSelectValue = privileges;
+    const isShowingDomainNavigator = !domainInputValue && isFocusedOnInput;
 
     // Construct privilege options for dropdown
     const platformPrivileges = policiesConfig?.platformPrivileges || [];
@@ -193,13 +199,14 @@ export default function PolicyPrivilegeForm({
     };
 
     // When a domain is selected, add its urn to the list of domains
-    const onSelectDomain = (domain) => {
+    const onSelectDomain = (domainUrn, domainObj?: Domain) => {
         const filter = resources.filter || {
             criteria: [],
         };
+        const domainEntity = domainObj || getEntityFromSearchResults(domainSearchResults, domainUrn);
         const updatedFilter = setFieldValues(filter, 'DOMAIN', [
             ...domains,
-            createCriterionValueWithEntity(domain, getEntityFromSearchResults(domainSearchResults, domain) || null),
+            createCriterionValueWithEntity(domainUrn, domainEntity || null),
         ]);
         setResources({
             ...resources,
@@ -207,6 +214,11 @@ export default function PolicyPrivilegeForm({
         });
     };
 
+    function selectDomainFromBrowser(domain: Domain) {
+        onSelectDomain(domain.urn, domain);
+        setIsFocusedOnInput(false);
+    }
+
     // When a domain is deselected, remove its urn from the list of domains
     const onDeselectDomain = (domain) => {
         const filter = resources.filter || {
@@ -243,6 +255,7 @@ export default function PolicyPrivilegeForm({
     // Handle domain search, if the domain type has an associated EntityType mapping.
     const handleDomainSearch = (text: string) => {
         const trimmedText: string = text.trim();
+        setDomainInputValue(trimmedText);
         searchDomains({
             variables: {
                 input: {
@@ -276,6 +289,15 @@ export default function PolicyPrivilegeForm({
             : displayStr;
     };
 
+    function handleCLickOutside() {
+        // delay closing the domain navigator so we don't get a UI "flash" between showing search results and navigator
+        setTimeout(() => setIsFocusedOnInput(false), 0);
+    }
+
+    function handleBlur() {
+        setDomainInputValue('');
+    }
+
     return (
         <PrivilegesForm layout="vertical">
             {showResourceFilterInput && (
@@ -342,33 +364,41 @@ export default function PolicyPrivilegeForm({
                 </Form.Item>
             )}
             {showResourceFilterInput && (
-                <Form.Item label={<Typography.Text strong>Domain</Typography.Text>}>
+                <Form.Item label={<Typography.Text strong>Select Domains</Typography.Text>}>
                     <Typography.Paragraph>
-                        Search for domains the policy should apply to. If <b>none</b> is selected, policy is applied to{' '}
-                        <b>all</b> resources in all domains.
+                        The policy will apply to any chosen domains and all their nested domains. If <b>none</b> are
+                        selected, the policy is applied to <b>all</b> resources of in all domains.
                     </Typography.Paragraph>
-                    <Select
-                        notFoundContent="No search results found"
-                        value={domainSelectValue}
-                        mode="multiple"
-                        filterOption={false}
-                        placeholder="Apply to ALL domains by default. Select domains to apply to specific domains."
-                        onSelect={onSelectDomain}
-                        onDeselect={onDeselectDomain}
-                        onSearch={handleDomainSearch}
-                        tagRender={(tagProps) => (
-                            <Tag closable={tagProps.closable} onClose={tagProps.onClose}>
-                                {displayStringWithMaxLength(
-                                    domainUrnToDisplayName[tagProps.value.toString()] || tagProps.value.toString(),
-                                    75,
-                                )}
-                            </Tag>
-                        )}
-                    >
-                        {domainSearchResults?.map((result) => (
-                            <Select.Option value={result.entity.urn}>{renderSearchResult(result)}</Select.Option>
-                        ))}
-                    </Select>
+                    <ClickOutside onClickOutside={handleCLickOutside}>
+                        <Select
+                            showSearch
+                            value={domainSelectValue}
+                            mode="multiple"
+                            filterOption={false}
+                            placeholder="Apply to ALL domains by default. Select domains to apply to specific domains."
+                            onSelect={(value) => onSelectDomain(value)}
+                            onDeselect={onDeselectDomain}
+                            onSearch={handleDomainSearch}
+                            onFocus={() => setIsFocusedOnInput(true)}
+                            onBlur={handleBlur}
+                            tagRender={(tagProps) => (
+                                <Tag closable={tagProps.closable} onClose={tagProps.onClose}>
+                                    {displayStringWithMaxLength(
+                                        domainUrnToDisplayName[tagProps.value.toString()] || tagProps.value.toString(),
+                                        75,
+                                    )}
+                                </Tag>
+                            )}
+                            dropdownStyle={isShowingDomainNavigator ? { display: 'none' } : {}}
+                        >
+                            {domainSearchResults?.map((result) => (
+                                <Select.Option value={result.entity.urn}>{renderSearchResult(result)}</Select.Option>
+                            ))}
+                        </Select>
+                        <BrowserWrapper isHidden={!isShowingDomainNavigator} width="100%" maxHeight={300}>
+                            <DomainNavigator selectDomainOverride={selectDomainFromBrowser} />
+                        </BrowserWrapper>
+                    </ClickOutside>
                 </Form.Item>
             )}
             <Form.Item label={<Typography.Text strong>Privileges</Typography.Text>}>
diff --git a/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx b/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx
index 319c8ed0a3e1d..36c4c020e7131 100644
--- a/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx
+++ b/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx
@@ -14,10 +14,10 @@ import {
     CorpUser,
     Deprecation,
     Domain,
-    ParentNodesResult,
     EntityPath,
     DataProduct,
     Health,
+    Entity,
 } from '../../types.generated';
 import TagTermGroup from '../shared/tags/TagTermGroup';
 import { ANTD_GRAY } from '../entity/shared/constants';
@@ -191,7 +191,7 @@ interface Props {
     // how the listed node is connected to the source node
     degree?: number;
     parentContainers?: ParentContainersResult | null;
-    parentNodes?: ParentNodesResult | null;
+    parentEntities?: Entity[] | null;
     previewType?: Maybe<PreviewType>;
     paths?: EntityPath[];
     health?: Health[];
@@ -231,7 +231,7 @@ export default function DefaultPreviewCard({
     onClick,
     degree,
     parentContainers,
-    parentNodes,
+    parentEntities,
     platforms,
     logoUrls,
     previewType,
@@ -280,7 +280,7 @@ export default function DefaultPreviewCard({
                         typeIcon={typeIcon}
                         entityType={type}
                         parentContainers={parentContainers?.containers}
-                        parentNodes={parentNodes?.nodes}
+                        parentEntities={parentEntities}
                         parentContainersRef={contentRef}
                         areContainersTruncated={isContentTruncated}
                     />
diff --git a/datahub-web-react/src/app/recommendations/renderer/component/DomainSearchList.tsx b/datahub-web-react/src/app/recommendations/renderer/component/DomainSearchList.tsx
index d3cc35ef6a932..c82521dab1bc9 100644
--- a/datahub-web-react/src/app/recommendations/renderer/component/DomainSearchList.tsx
+++ b/datahub-web-react/src/app/recommendations/renderer/component/DomainSearchList.tsx
@@ -1,10 +1,14 @@
+import { ArrowRightOutlined } from '@ant-design/icons';
 import React from 'react';
 import { Link } from 'react-router-dom';
 import styled from 'styled-components';
 import { Domain, EntityType, RecommendationContent } from '../../../../types.generated';
-import { IconStyleType } from '../../../entity/Entity';
 import { LogoCountCard } from '../../../shared/LogoCountCard';
 import { useEntityRegistry } from '../../../useEntityRegistry';
+import DomainIcon from '../../../domain/DomainIcon';
+import { PageRoutes } from '../../../../conf/Global';
+import { HomePageButton } from '../../../shared/components';
+import { HoverEntityTooltip } from './HoverEntityTooltip';
 
 const DomainListContainer = styled.div`
     display: flex;
@@ -13,6 +17,17 @@ const DomainListContainer = styled.div`
     flex-wrap: wrap;
 `;
 
+const AllDomainsWrapper = styled.div`
+    color: ${(props) => props.theme.styles['primary-color']};
+    font-size: 14px;
+`;
+
+const AllDomainsText = styled.div`
+    margin-bottom: 8px;
+`;
+
+const NUM_DOMAIN_CARDS = 9;
+
 type Props = {
     content: Array<RecommendationContent>;
     onClick?: (index: number) => void;
@@ -23,7 +38,8 @@ export const DomainSearchList = ({ content, onClick }: Props) => {
 
     const domainsWithCounts: Array<{ domain: Domain; count?: number }> = content
         .map((cnt) => ({ domain: cnt.entity, count: cnt.params?.contentParams?.count }))
-        .filter((domainWithCount) => domainWithCount.domain !== null && domainWithCount !== undefined) as Array<{
+        .filter((domainWithCount) => domainWithCount?.domain !== null)
+        .slice(0, NUM_DOMAIN_CARDS) as Array<{
         domain: Domain;
         count?: number;
     }>;
@@ -31,18 +47,34 @@ export const DomainSearchList = ({ content, onClick }: Props) => {
     return (
         <DomainListContainer>
             {domainsWithCounts.map((domain, index) => (
-                <Link
-                    to={entityRegistry.getEntityUrl(EntityType.Domain, domain.domain.urn)}
-                    key={domain.domain.urn}
-                    onClick={() => onClick?.(index)}
-                >
-                    <LogoCountCard
-                        name={entityRegistry.getDisplayName(EntityType.Domain, domain.domain)}
-                        logoComponent={entityRegistry.getIcon(EntityType.Domain, 16, IconStyleType.ACCENT)}
-                        count={domain.count}
-                    />
-                </Link>
+                <HoverEntityTooltip key={domain.domain.urn} entity={domain.domain} placement="bottom">
+                    <Link
+                        to={entityRegistry.getEntityUrl(EntityType.Domain, domain.domain.urn)}
+                        onClick={() => onClick?.(index)}
+                    >
+                        <LogoCountCard
+                            name={entityRegistry.getDisplayName(EntityType.Domain, domain.domain)}
+                            logoComponent={
+                                <DomainIcon
+                                    style={{
+                                        fontSize: 16,
+                                        color: '#BFBFBF',
+                                    }}
+                                />
+                            }
+                            count={domain.count}
+                        />
+                    </Link>
+                </HoverEntityTooltip>
             ))}
+            <Link to={PageRoutes.DOMAINS}>
+                <HomePageButton type="link">
+                    <AllDomainsWrapper>
+                        <AllDomainsText>View All Domains</AllDomainsText>
+                        <ArrowRightOutlined />
+                    </AllDomainsWrapper>
+                </HomePageButton>
+            </Link>
         </DomainListContainer>
     );
 };
diff --git a/datahub-web-react/src/app/recommendations/renderer/component/HoverEntityTooltip.tsx b/datahub-web-react/src/app/recommendations/renderer/component/HoverEntityTooltip.tsx
index a39a39cd52db9..9ff0a1a2f940b 100644
--- a/datahub-web-react/src/app/recommendations/renderer/component/HoverEntityTooltip.tsx
+++ b/datahub-web-react/src/app/recommendations/renderer/component/HoverEntityTooltip.tsx
@@ -1,3 +1,4 @@
+import { TooltipPlacement } from 'antd/es/tooltip';
 import { Tooltip } from 'antd';
 import React from 'react';
 import { Entity } from '../../../../types.generated';
@@ -9,9 +10,10 @@ type Props = {
     // whether the tooltip can be opened or if it should always stay closed
     canOpen?: boolean;
     children: React.ReactNode;
+    placement?: TooltipPlacement;
 };
 
-export const HoverEntityTooltip = ({ entity, canOpen = true, children }: Props) => {
+export const HoverEntityTooltip = ({ entity, canOpen = true, children, placement }: Props) => {
     const entityRegistry = useEntityRegistry();
 
     if (!entity || !entity.type || !entity.urn) {
@@ -23,7 +25,7 @@ export const HoverEntityTooltip = ({ entity, canOpen = true, children }: Props)
         <Tooltip
             visible={canOpen ? undefined : false}
             color="white"
-            placement="topRight"
+            placement={placement || 'topRight'}
             overlayStyle={{ minWidth: 300, maxWidth: 600, width: 'fit-content' }}
             overlayInnerStyle={{ padding: 12 }}
             title={<a href={url}>{entityRegistry.renderPreview(entity.type, PreviewType.HOVER_CARD, entity)}</a>}
diff --git a/datahub-web-react/src/app/search/SearchResultList.tsx b/datahub-web-react/src/app/search/SearchResultList.tsx
index 386b22f34602b..f8ca9a46d1a81 100644
--- a/datahub-web-react/src/app/search/SearchResultList.tsx
+++ b/datahub-web-react/src/app/search/SearchResultList.tsx
@@ -31,7 +31,7 @@ const ThinDivider = styled(Divider)`
     margin-bottom: 16px;
 `;
 
-const ResultWrapper = styled.div<{ showUpdatedStyles: boolean }>`
+export const ResultWrapper = styled.div<{ showUpdatedStyles: boolean }>`
     ${(props) =>
         props.showUpdatedStyles &&
         `    
@@ -39,7 +39,6 @@ const ResultWrapper = styled.div<{ showUpdatedStyles: boolean }>`
         border-radius: 5px;
         margin: 0 auto 8px auto;
         padding: 8px 16px;
-        max-width: 1200px;
         border-bottom: 1px solid ${ANTD_GRAY[5]};
     `}
 `;
diff --git a/datahub-web-react/src/app/search/SearchResults.tsx b/datahub-web-react/src/app/search/SearchResults.tsx
index d21213f462f54..b93e835970196 100644
--- a/datahub-web-react/src/app/search/SearchResults.tsx
+++ b/datahub-web-react/src/app/search/SearchResults.tsx
@@ -27,6 +27,7 @@ import useToggleSidebar from './useToggleSidebar';
 import SearchSortSelect from './sorting/SearchSortSelect';
 import { combineSiblingsInSearchResults } from './utils/combineSiblingsInSearchResults';
 import SearchQuerySuggester from './suggestions/SearchQuerySugggester';
+import { ANTD_GRAY_V2 } from '../entity/shared/constants';
 
 const SearchResultsWrapper = styled.div<{ v2Styles: boolean }>`
     display: flex;
@@ -55,7 +56,7 @@ const ResultContainer = styled.div<{ v2Styles: boolean }>`
             ? `
         display: flex;
         flex-direction: column;
-        background-color: #F8F9FA;
+        background-color: ${ANTD_GRAY_V2[1]};
     `
             : `
         max-width: calc(100% - 260px);
diff --git a/datahub-web-react/src/app/search/autoComplete/AutoCompleteEntity.tsx b/datahub-web-react/src/app/search/autoComplete/AutoCompleteEntity.tsx
index d241a3895f19f..2154837fa5e26 100644
--- a/datahub-web-react/src/app/search/autoComplete/AutoCompleteEntity.tsx
+++ b/datahub-web-react/src/app/search/autoComplete/AutoCompleteEntity.tsx
@@ -10,6 +10,8 @@ import AutoCompleteEntityIcon from './AutoCompleteEntityIcon';
 import { SuggestionText } from './styledComponents';
 import AutoCompletePlatformNames from './AutoCompletePlatformNames';
 import { getPlatformName } from '../../entity/shared/utils';
+import { getParentEntities } from '../filters/utils';
+import ParentEntities from '../filters/ParentEntities';
 
 const AutoCompleteEntityWrapper = styled.div`
     display: flex;
@@ -76,11 +78,12 @@ export default function AutoCompleteEntity({ query, entity, siblings, hasParentT
     // Need to reverse parentContainers since it returns direct parent first.
     const orderedParentContainers = [...parentContainers].reverse();
     const subtype = genericEntityProps?.subTypes?.typeNames?.[0];
+    const parentEntities = getParentEntities(entity) || [];
 
     const showPlatforms = !!platforms.length;
     const showPlatformDivider = !!platforms.length && !!parentContainers.length;
     const showParentContainers = !!parentContainers.length;
-    const showHeader = showPlatforms || showParentContainers;
+    const showHeader = showPlatforms || showParentContainers || parentEntities.length > 0;
 
     return (
         <AutoCompleteEntityWrapper data-testid={`auto-complete-entity-name-${displayName}`}>
@@ -96,6 +99,7 @@ export default function AutoCompleteEntity({ query, entity, siblings, hasParentT
                             {showPlatforms && <AutoCompletePlatformNames platforms={platforms} />}
                             {showPlatformDivider && <Divider />}
                             {showParentContainers && <ParentContainers parentContainers={orderedParentContainers} />}
+                            <ParentEntities parentEntities={parentEntities} numVisible={3} />
                         </ItemHeader>
                     )}
                     <Typography.Text
diff --git a/datahub-web-react/src/app/search/filters/FilterOption.tsx b/datahub-web-react/src/app/search/filters/FilterOption.tsx
index b112413efd845..0a3ea3822763e 100644
--- a/datahub-web-react/src/app/search/filters/FilterOption.tsx
+++ b/datahub-web-react/src/app/search/filters/FilterOption.tsx
@@ -3,7 +3,7 @@ import { Button, Checkbox } from 'antd';
 import React, { useState } from 'react';
 import styled from 'styled-components';
 import { FilterOptionType } from './types';
-import { EntityType, GlossaryNode, GlossaryTerm, Tag } from '../../../types.generated';
+import { Entity, EntityType, Tag } from '../../../types.generated';
 import { generateColor } from '../../entity/shared/components/styled/StyledTag';
 import { ANTD_GRAY } from '../../entity/shared/constants';
 import { useEntityRegistry } from '../../useEntityRegistry';
@@ -15,9 +15,9 @@ import {
     TYPE_NAMES_FILTER_NAME,
 } from '../utils/constants';
 import { IconSpacer, Label } from './ActiveFilter';
-import { isFilterOptionSelected, getFilterIconAndLabel, isAnyOptionSelected } from './utils';
+import { isFilterOptionSelected, getFilterIconAndLabel, isAnyOptionSelected, getParentEntities } from './utils';
 import { capitalizeFirstLetterOnly } from '../../shared/textUtil';
-import ParentNodes from './ParentNodes';
+import ParentEntities from './ParentEntities';
 import { formatNumber } from '../../shared/formatNumber';
 
 const FilterOptionWrapper = styled.div<{ centerAlign?: boolean; addPadding?: boolean }>`
@@ -102,6 +102,10 @@ const ArrowButton = styled(Button)<{ isOpen: boolean }>`
     `}
 `;
 
+const ParentWrapper = styled.div`
+    max-width: 220px;
+`;
+
 interface Props {
     filterOption: FilterOptionType;
     selectedFilterOptions: FilterOptionType[];
@@ -124,8 +128,7 @@ export default function FilterOption({
     const shouldShowIcon = field === PLATFORM_FILTER_NAME && icon !== null;
     const shouldShowTagColor = field === TAGS_FILTER_NAME && entity?.type === EntityType.Tag;
     const isSubTypeFilter = field === TYPE_NAMES_FILTER_NAME;
-    const isGlossaryTerm = entity?.type === EntityType.GlossaryTerm;
-    const parentNodes: GlossaryNode[] = isGlossaryTerm ? (entity as GlossaryTerm).parentNodes?.nodes || [] : [];
+    const parentEntities: Entity[] = getParentEntities(entity as Entity) || [];
     // only entity type filters return 10,000 max aggs
     const countText = count === MAX_COUNT_VAL && field === ENTITY_SUB_TYPE_FILTER_NAME ? '10k+' : formatNumber(count);
 
@@ -143,7 +146,7 @@ export default function FilterOption({
 
     return (
         <>
-            <FilterOptionWrapper centerAlign={parentNodes.length > 0} addPadding={addPadding}>
+            <FilterOptionWrapper centerAlign={parentEntities.length > 0} addPadding={addPadding}>
                 <StyledCheckbox
                     checked={isFilterOptionSelected(selectedFilterOptions, value)}
                     // show indeterminate if a nested option is selected
@@ -154,7 +157,11 @@ export default function FilterOption({
                     onClick={updateFilterValues}
                     data-testid={`filter-option-${label}`}
                 >
-                    {isGlossaryTerm && <ParentNodes glossaryTerm={entity as GlossaryTerm} />}
+                    {parentEntities.length > 0 && (
+                        <ParentWrapper>
+                            <ParentEntities parentEntities={parentEntities} />
+                        </ParentWrapper>
+                    )}
                     <CheckboxContent>
                         {shouldShowIcon && <>{icon}</>}
                         {shouldShowTagColor && (
diff --git a/datahub-web-react/src/app/search/filters/ParentNodes.tsx b/datahub-web-react/src/app/search/filters/ParentEntities.tsx
similarity index 54%
rename from datahub-web-react/src/app/search/filters/ParentNodes.tsx
rename to datahub-web-react/src/app/search/filters/ParentEntities.tsx
index 7012f07c16e64..2504d5f0ff25a 100644
--- a/datahub-web-react/src/app/search/filters/ParentNodes.tsx
+++ b/datahub-web-react/src/app/search/filters/ParentEntities.tsx
@@ -2,19 +2,16 @@ import { FolderOpenOutlined } from '@ant-design/icons';
 import { Tooltip, Typography } from 'antd';
 import React from 'react';
 import styled from 'styled-components';
-import { EntityType, GlossaryNode, GlossaryTerm } from '../../../types.generated';
+import { Entity } from '../../../types.generated';
 import { ANTD_GRAY } from '../../entity/shared/constants';
 import { useEntityRegistry } from '../../useEntityRegistry';
 
-const NUM_VISIBLE_NODES = 2;
-
 const ParentNodesWrapper = styled.div`
     font-size: 12px;
     color: ${ANTD_GRAY[7]};
     display: flex;
     align-items: center;
     margin-bottom: 3px;
-    max-width: 220px;
     overflow: hidden;
 `;
 
@@ -27,54 +24,62 @@ export const ArrowWrapper = styled.span`
     margin: 0 3px;
 `;
 
+const StyledTooltip = styled(Tooltip)`
+    display: flex;
+    white-space: nowrap;
+    overflow: hidden;
+`;
+
+const DEFAULT_NUM_VISIBLE = 2;
+
 interface Props {
-    glossaryTerm: GlossaryTerm;
+    parentEntities: Entity[];
+    numVisible?: number;
 }
 
-export default function ParentNodes({ glossaryTerm }: Props) {
+export default function ParentEntities({ parentEntities, numVisible = DEFAULT_NUM_VISIBLE }: Props) {
     const entityRegistry = useEntityRegistry();
 
-    const parentNodes: GlossaryNode[] = glossaryTerm.parentNodes?.nodes || [];
-    // parent nodes are returned with direct parent first
-    const orderedParentNodes = [...parentNodes].reverse();
-    const visibleNodes = orderedParentNodes.slice(orderedParentNodes.length - NUM_VISIBLE_NODES);
-    const numHiddenNodes = orderedParentNodes.length - NUM_VISIBLE_NODES;
-    const includeNodePathTooltip = parentNodes.length > NUM_VISIBLE_NODES;
+    // parent nodes/domains are returned with direct parent first
+    const orderedParentEntities = [...parentEntities].reverse();
+    const numHiddenEntities = orderedParentEntities.length - numVisible;
+    const hasHiddenEntities = numHiddenEntities > 0;
+    const visibleNodes = hasHiddenEntities ? orderedParentEntities.slice(numHiddenEntities) : orderedParentEntities;
 
-    if (!parentNodes.length) return null;
+    if (!parentEntities.length) return null;
 
     return (
-        <Tooltip
-            overlayStyle={includeNodePathTooltip ? { maxWidth: 450 } : { display: 'none' }}
+        <StyledTooltip
+            overlayStyle={hasHiddenEntities ? { maxWidth: 450 } : { display: 'none' }}
             placement="top"
             title={
                 <>
-                    {orderedParentNodes.map((glossaryNode, index) => (
+                    {orderedParentEntities.map((parentEntity, index) => (
                         <>
                             <FolderOpenOutlined />
                             <ParentNode color="white">
-                                {entityRegistry.getDisplayName(EntityType.GlossaryNode, glossaryNode)}
+                                {entityRegistry.getDisplayName(parentEntity.type, parentEntity)}
                             </ParentNode>
-                            {index !== orderedParentNodes.length - 1 && <ArrowWrapper>{'>'}</ArrowWrapper>}
+                            {index !== orderedParentEntities.length - 1 && <ArrowWrapper>{'>'}</ArrowWrapper>}
                         </>
                     ))}
                 </>
             }
         >
             <ParentNodesWrapper>
-                {numHiddenNodes > 0 &&
-                    [...Array(numHiddenNodes)].map(() => (
+                {hasHiddenEntities &&
+                    [...Array(numHiddenEntities)].map(() => (
                         <>
                             <FolderOpenOutlined />
                             <ArrowWrapper>{'>'}</ArrowWrapper>
                         </>
                     ))}
-                {visibleNodes.map((glossaryNode, index) => {
-                    const displayName = entityRegistry.getDisplayName(EntityType.GlossaryNode, glossaryNode);
+                {visibleNodes.map((parentEntity, index) => {
+                    const displayName = entityRegistry.getDisplayName(parentEntity.type, parentEntity);
                     return (
                         <>
                             <FolderOpenOutlined />
-                            <ParentNode ellipsis={!includeNodePathTooltip ? { tooltip: displayName } : true}>
+                            <ParentNode ellipsis={!hasHiddenEntities ? { tooltip: displayName } : true}>
                                 {displayName}
                             </ParentNode>
                             {index !== visibleNodes.length - 1 && <ArrowWrapper>{'>'}</ArrowWrapper>}
@@ -82,6 +87,6 @@ export default function ParentNodes({ glossaryTerm }: Props) {
                     );
                 })}
             </ParentNodesWrapper>
-        </Tooltip>
+        </StyledTooltip>
     );
 }
diff --git a/datahub-web-react/src/app/search/filters/utils.tsx b/datahub-web-react/src/app/search/filters/utils.tsx
index fbde71d6a2e9a..6ea9d0e8baa4f 100644
--- a/datahub-web-react/src/app/search/filters/utils.tsx
+++ b/datahub-web-react/src/app/search/filters/utils.tsx
@@ -14,10 +14,12 @@ import {
     AggregationMetadata,
     DataPlatform,
     DataPlatformInstance,
+    Domain,
     Entity,
     EntityType,
     FacetFilterInput,
     FacetMetadata,
+    GlossaryTerm,
 } from '../../../types.generated';
 import { IconStyleType } from '../../entity/Entity';
 import {
@@ -331,3 +333,16 @@ export function canCreateViewFromFilters(activeFilters: FacetFilterInput[]) {
     }
     return true;
 }
+
+export function getParentEntities(entity: Entity): Entity[] | null {
+    if (!entity) {
+        return null;
+    }
+    if (entity.type === EntityType.GlossaryTerm) {
+        return (entity as GlossaryTerm).parentNodes?.nodes || [];
+    }
+    if (entity.type === EntityType.Domain) {
+        return (entity as Domain).parentDomains?.domains || [];
+    }
+    return null;
+}
diff --git a/datahub-web-react/src/app/search/sidebar/BrowseSidebar.tsx b/datahub-web-react/src/app/search/sidebar/BrowseSidebar.tsx
index b5e9272cc5273..0d3d40c4a71af 100644
--- a/datahub-web-react/src/app/search/sidebar/BrowseSidebar.tsx
+++ b/datahub-web-react/src/app/search/sidebar/BrowseSidebar.tsx
@@ -6,13 +6,14 @@ import { BrowseProvider } from './BrowseContext';
 import SidebarLoadingError from './SidebarLoadingError';
 import { SEARCH_RESULTS_BROWSE_SIDEBAR_ID } from '../../onboarding/config/SearchOnboardingConfig';
 import useSidebarEntities from './useSidebarEntities';
+import { ANTD_GRAY_V2 } from '../../entity/shared/constants';
 
 const Sidebar = styled.div<{ visible: boolean; width: number }>`
     height: 100%;
     width: ${(props) => (props.visible ? `${props.width}px` : '0')};
     transition: width 250ms ease-in-out;
     border-right: 1px solid ${(props) => props.theme.styles['border-color-base']};
-    background-color: #f8f9fa;
+    background-color: ${ANTD_GRAY_V2[1]};
     background: white;
 `;
 
diff --git a/datahub-web-react/src/app/search/sidebar/ExpandableNode.tsx b/datahub-web-react/src/app/search/sidebar/ExpandableNode.tsx
index 32d2c4af948ef..ba93cf94fba2b 100644
--- a/datahub-web-react/src/app/search/sidebar/ExpandableNode.tsx
+++ b/datahub-web-react/src/app/search/sidebar/ExpandableNode.tsx
@@ -1,9 +1,10 @@
 import React, { MouseEventHandler, ReactNode } from 'react';
 import styled from 'styled-components';
 import { VscTriangleRight } from 'react-icons/vsc';
-import { Button, Typography } from 'antd';
+import { Typography } from 'antd';
 import { UpCircleOutlined } from '@ant-design/icons';
 import { ANTD_GRAY } from '../../entity/shared/constants';
+import { BaseButton, BodyContainer, BodyGridExpander, RotatingButton } from '../../shared/components';
 
 const Layout = styled.div`
     margin-left: 8px;
@@ -11,17 +12,6 @@ const Layout = styled.div`
 
 const HeaderContainer = styled.div``;
 
-const BodyGridExpander = styled.div<{ isOpen: boolean }>`
-    display: grid;
-    grid-template-rows: ${(props) => (props.isOpen ? '1fr' : '0fr')};
-    transition: grid-template-rows 250ms;
-    overflow: hidden;
-`;
-
-const BodyContainer = styled.div`
-    min-height: 0;
-`;
-
 type ExpandableNodeProps = {
     isOpen: boolean;
     header: ReactNode;
@@ -68,22 +58,6 @@ ExpandableNode.HeaderLeft = styled.div`
     align-items: center;
 `;
 
-const BaseButton = styled(Button)`
-    &&& {
-        display: flex;
-        align-items: center;
-        justify-content: center;
-        border: none;
-        box-shadow: none;
-        border-radius: 50%;
-    }
-`;
-
-const RotatingButton = styled(BaseButton)<{ deg: number }>`
-    transform: rotate(${(props) => props.deg}deg);
-    transition: transform 250ms;
-`;
-
 ExpandableNode.StaticButton = ({ icon, onClick }: { icon: JSX.Element; onClick?: () => void }) => {
     const onClickButton: MouseEventHandler = (e) => {
         e.stopPropagation();
diff --git a/datahub-web-react/src/app/shared/LogoCountCard.tsx b/datahub-web-react/src/app/shared/LogoCountCard.tsx
index 3e2f74ebe5166..ebf0d9cd4f54e 100644
--- a/datahub-web-react/src/app/shared/LogoCountCard.tsx
+++ b/datahub-web-react/src/app/shared/LogoCountCard.tsx
@@ -1,27 +1,9 @@
 import React from 'react';
-import { Image, Typography, Button } from 'antd';
+import { Image, Typography } from 'antd';
 import styled from 'styled-components';
 import { ANTD_GRAY } from '../entity/shared/constants';
 import { formatNumber } from './formatNumber';
-
-const Container = styled(Button)`
-    margin-right: 12px;
-    margin-left: 12px;
-    margin-bottom: 12px;
-    width: 160px;
-    height: 140px;
-    display: flex;
-    justify-content: center;
-    border-radius: 4px;
-    align-items: center;
-    flex-direction: column;
-    border: 1px solid ${ANTD_GRAY[4]};
-    box-shadow: ${(props) => props.theme.styles['box-shadow']};
-    &&:hover {
-        box-shadow: ${(props) => props.theme.styles['box-shadow-hover']};
-    }
-    white-space: unset;
-`;
+import { HomePageButton } from './components';
 
 const PlatformLogo = styled(Image)`
     max-height: 32px;
@@ -53,7 +35,7 @@ type Props = {
 
 export const LogoCountCard = ({ logoUrl, logoComponent, name, count, onClick }: Props) => {
     return (
-        <Container type="link" onClick={onClick}>
+        <HomePageButton type="link" onClick={onClick}>
             <LogoContainer>
                 {(logoUrl && <PlatformLogo preview={false} src={logoUrl} alt={name} />) || logoComponent}
             </LogoContainer>
@@ -68,6 +50,6 @@ export const LogoCountCard = ({ logoUrl, logoComponent, name, count, onClick }:
                 </Title>
             </TitleContainer>
             {count !== undefined && <CountText>{formatNumber(count)}</CountText>}
-        </Container>
+        </HomePageButton>
     );
 };
diff --git a/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx b/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx
index 39035d5bff562..ced7d8642576b 100644
--- a/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx
+++ b/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx
@@ -5,7 +5,6 @@ import {
     BarChartOutlined,
     BookOutlined,
     SettingOutlined,
-    FolderOutlined,
     SolutionOutlined,
     DownOutlined,
 } from '@ant-design/icons';
@@ -16,6 +15,7 @@ import { ANTD_GRAY } from '../../entity/shared/constants';
 import { HOME_PAGE_INGESTION_ID } from '../../onboarding/config/HomePageOnboardingConfig';
 import { useUpdateEducationStepIdsAllowlist } from '../../onboarding/useUpdateEducationStepIdsAllowlist';
 import { useUserContext } from '../../context/useUserContext';
+import DomainIcon from '../../domain/DomainIcon';
 
 const LinkWrapper = styled.span`
     margin-right: 0px;
@@ -124,7 +124,12 @@ export function HeaderLinks(props: Props) {
                             <MenuItem key="1">
                                 <Link to="/domains">
                                     <NavTitleContainer>
-                                        <FolderOutlined style={{ fontSize: '14px', fontWeight: 'bold' }} />
+                                        <DomainIcon
+                                            style={{
+                                                fontSize: 14,
+                                                fontWeight: 'bold',
+                                            }}
+                                        />
                                         <NavTitleText>Domains</NavTitleText>
                                     </NavTitleContainer>
                                     <NavTitleDescription>Manage related groups of data assets</NavTitleDescription>
diff --git a/datahub-web-react/src/app/shared/components.tsx b/datahub-web-react/src/app/shared/components.tsx
new file mode 100644
index 0000000000000..68d2fb52cfdba
--- /dev/null
+++ b/datahub-web-react/src/app/shared/components.tsx
@@ -0,0 +1,49 @@
+import { Button } from 'antd';
+import styled from 'styled-components';
+import { ANTD_GRAY } from '../entity/shared/constants';
+
+export const HomePageButton = styled(Button)`
+    margin-right: 12px;
+    margin-left: 12px;
+    margin-bottom: 12px;
+    width: 160px;
+    height: 140px;
+    display: flex;
+    justify-content: center;
+    border-radius: 4px;
+    align-items: center;
+    flex-direction: column;
+    border: 1px solid ${ANTD_GRAY[4]};
+    box-shadow: ${(props) => props.theme.styles['box-shadow']};
+    &&:hover {
+        box-shadow: ${(props) => props.theme.styles['box-shadow-hover']};
+    }
+    white-space: unset;
+`;
+
+export const BaseButton = styled(Button)`
+    &&& {
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        border: none;
+        box-shadow: none;
+        border-radius: 50%;
+    }
+`;
+
+export const RotatingButton = styled(BaseButton)<{ deg: number }>`
+    transform: rotate(${(props) => props.deg}deg);
+    transition: transform 250ms;
+`;
+
+export const BodyGridExpander = styled.div<{ isOpen: boolean }>`
+    display: grid;
+    grid-template-rows: ${(props) => (props.isOpen ? '1fr' : '0fr')};
+    transition: grid-template-rows 250ms;
+    overflow: hidden;
+`;
+
+export const BodyContainer = styled.div`
+    min-height: 0;
+`;
diff --git a/datahub-web-react/src/app/shared/deleteUtils.ts b/datahub-web-react/src/app/shared/deleteUtils.ts
index c1bfeac37372b..37a3758712ad6 100644
--- a/datahub-web-react/src/app/shared/deleteUtils.ts
+++ b/datahub-web-react/src/app/shared/deleteUtils.ts
@@ -1,3 +1,4 @@
+import { PageRoutes } from '../../conf/Global';
 import { useDeleteAssertionMutation } from '../../graphql/assertion.generated';
 import { useDeleteDataProductMutation } from '../../graphql/dataProduct.generated';
 import { useDeleteDomainMutation } from '../../graphql/domain.generated';
@@ -18,10 +19,11 @@ export const getEntityProfileDeleteRedirectPath = (type: EntityType, entityData:
     switch (type) {
         case EntityType.CorpGroup:
         case EntityType.CorpUser:
-        case EntityType.Domain:
         case EntityType.Tag:
             // Return Home.
             return '/';
+        case EntityType.Domain:
+            return `${PageRoutes.DOMAINS}`;
         case EntityType.GlossaryNode:
         case EntityType.GlossaryTerm:
             // Return to glossary page.
diff --git a/datahub-web-react/src/app/shared/sidebar/components.tsx b/datahub-web-react/src/app/shared/sidebar/components.tsx
new file mode 100644
index 0000000000000..5d123d6022790
--- /dev/null
+++ b/datahub-web-react/src/app/shared/sidebar/components.tsx
@@ -0,0 +1,23 @@
+import React from 'react';
+import { RightOutlined } from '@ant-design/icons';
+import styled from 'styled-components';
+import { RotatingButton } from '../components';
+
+export const SidebarWrapper = styled.div<{ width: number }>`
+    max-height: 100%;
+    width: ${(props) => props.width}px;
+    min-width: ${(props) => props.width}px;
+`;
+
+export function RotatingTriangle({ isOpen, onClick }: { isOpen: boolean; onClick?: () => void }) {
+    return (
+        <RotatingButton
+            ghost
+            size="small"
+            type="ghost"
+            deg={isOpen ? 90 : 0}
+            icon={<RightOutlined style={{ color: 'black' }} />}
+            onClick={onClick}
+        />
+    );
+}
diff --git a/datahub-web-react/src/app/shared/styleUtils.ts b/datahub-web-react/src/app/shared/styleUtils.ts
new file mode 100644
index 0000000000000..21bc866218cb8
--- /dev/null
+++ b/datahub-web-react/src/app/shared/styleUtils.ts
@@ -0,0 +1,7 @@
+export function applyOpacity(hexColor: string, opacity: number) {
+    if (hexColor.length !== 7) return hexColor;
+
+    const updatedOpacity = Math.round(opacity * 2.55);
+
+    return hexColor + updatedOpacity.toString(16).padStart(2, '0');
+}
diff --git a/datahub-web-react/src/app/shared/tags/AddTagsTermsModal.tsx b/datahub-web-react/src/app/shared/tags/AddTagsTermsModal.tsx
index 01e11ceb9a738..80d239def391c 100644
--- a/datahub-web-react/src/app/shared/tags/AddTagsTermsModal.tsx
+++ b/datahub-web-react/src/app/shared/tags/AddTagsTermsModal.tsx
@@ -50,15 +50,15 @@ const StyleTag = styled(CustomTag)`
     line-height: 16px;
 `;
 
-export const BrowserWrapper = styled.div<{ isHidden: boolean }>`
+export const BrowserWrapper = styled.div<{ isHidden: boolean; width?: string; maxHeight?: number }>`
     background-color: white;
     border-radius: 5px;
     box-shadow: 0 3px 6px -4px rgb(0 0 0 / 12%), 0 6px 16px 0 rgb(0 0 0 / 8%), 0 9px 28px 8px rgb(0 0 0 / 5%);
-    max-height: 380px;
+    max-height: ${(props) => (props.maxHeight ? props.maxHeight : '380')}px;
     overflow: auto;
     position: absolute;
     transition: opacity 0.2s;
-    width: 480px;
+    width: ${(props) => (props.width ? props.width : '480px')};
     z-index: 1051;
     ${(props) =>
         props.isHidden &&
diff --git a/datahub-web-react/src/app/shared/tags/DomainLink.tsx b/datahub-web-react/src/app/shared/tags/DomainLink.tsx
index 1c14b71369ed6..a14114ce43e43 100644
--- a/datahub-web-react/src/app/shared/tags/DomainLink.tsx
+++ b/datahub-web-react/src/app/shared/tags/DomainLink.tsx
@@ -3,10 +3,10 @@ import React from 'react';
 import { Link } from 'react-router-dom';
 import styled from 'styled-components';
 import { Domain, EntityType } from '../../../types.generated';
-import { IconStyleType } from '../../entity/Entity';
 import { HoverEntityTooltip } from '../../recommendations/renderer/component/HoverEntityTooltip';
 import { useEntityRegistry } from '../../useEntityRegistry';
 import { ANTD_GRAY } from '../../entity/shared/constants';
+import DomainIcon from '../../domain/DomainIcon';
 
 const DomainLinkContainer = styled(Link)`
     display: inline-block;
@@ -39,7 +39,12 @@ function DomainContent({ domain, name, closable, onClose, tagStyle, fontSize }:
     return (
         <StyledTag style={tagStyle} closable={closable} onClose={onClose} fontSize={fontSize}>
             <span style={{ paddingRight: '4px' }}>
-                {entityRegistry.getIcon(EntityType.Domain, fontSize || 10, IconStyleType.ACCENT, ANTD_GRAY[9])}
+                <DomainIcon
+                    style={{
+                        fontSize: 10,
+                        color: ANTD_GRAY[9],
+                    }}
+                />
             </span>
             {displayName}
         </StyledTag>
diff --git a/datahub-web-react/src/app/shared/useToggle.ts b/datahub-web-react/src/app/shared/useToggle.ts
index b020bf030f079..a73c702c4351b 100644
--- a/datahub-web-react/src/app/shared/useToggle.ts
+++ b/datahub-web-react/src/app/shared/useToggle.ts
@@ -1,4 +1,4 @@
-import { useState } from 'react';
+import { useMemo, useState } from 'react';
 
 const NOOP = (_: boolean) => {};
 
@@ -9,25 +9,39 @@ const useToggle = ({ initialValue = false, closeDelay = 0, openDelay = 0, onTogg
     const isClosing = transition === 'closing';
     const isTransitioning = transition !== null;
 
-    const toggle = () => {
-        if (isOpen) {
+    const toggleClose = useMemo(
+        () => () => {
             setTransition('closing');
             window.setTimeout(() => {
                 setIsOpen(false);
                 setTransition(null);
                 onToggle(false);
             }, closeDelay);
-        } else {
+        },
+        [closeDelay, onToggle],
+    );
+
+    const toggleOpen = useMemo(
+        () => () => {
             setTransition('opening');
             window.setTimeout(() => {
                 setIsOpen(true);
                 setTransition(null);
                 onToggle(true);
             }, openDelay);
+        },
+        [openDelay, onToggle],
+    );
+
+    const toggle = () => {
+        if (isOpen) {
+            toggleClose();
+        } else {
+            toggleOpen();
         }
     };
 
-    return { isOpen, isClosing, isOpening, isTransitioning, toggle } as const;
+    return { isOpen, isClosing, isOpening, isTransitioning, toggle, toggleOpen, toggleClose } as const;
 };
 
 export default useToggle;
diff --git a/datahub-web-react/src/app/useAppConfig.ts b/datahub-web-react/src/app/useAppConfig.ts
index cdc8f92210a0d..821d00b9017c3 100644
--- a/datahub-web-react/src/app/useAppConfig.ts
+++ b/datahub-web-react/src/app/useAppConfig.ts
@@ -12,3 +12,8 @@ export function useIsShowAcrylInfoEnabled() {
     const appConfig = useAppConfig();
     return appConfig.config.featureFlags.showAcrylInfo;
 }
+
+export function useIsNestedDomainsEnabled() {
+    const appConfig = useAppConfig();
+    return appConfig.config.featureFlags.nestedDomainsEnabled;
+}
diff --git a/datahub-web-react/src/appConfigContext.tsx b/datahub-web-react/src/appConfigContext.tsx
index 096c2fd6ef0e5..4087ad453687c 100644
--- a/datahub-web-react/src/appConfigContext.tsx
+++ b/datahub-web-react/src/appConfigContext.tsx
@@ -49,6 +49,7 @@ export const DEFAULT_APP_CONFIG = {
         showBrowseV2: true,
         showAcrylInfo: false,
         showAccessManagement: false,
+        nestedDomainsEnabled: true,
     },
 };
 
diff --git a/datahub-web-react/src/conf/Global.ts b/datahub-web-react/src/conf/Global.ts
index e1220b8c81b53..82378bb621427 100644
--- a/datahub-web-react/src/conf/Global.ts
+++ b/datahub-web-react/src/conf/Global.ts
@@ -24,6 +24,7 @@ export enum PageRoutes {
     INGESTION = '/ingestion',
     SETTINGS = '/settings',
     DOMAINS = '/domains',
+    DOMAIN = '/domain',
     GLOSSARY = '/glossary',
     SETTINGS_VIEWS = '/settings/views',
     EMBED = '/embed',
diff --git a/datahub-web-react/src/graphql/app.graphql b/datahub-web-react/src/graphql/app.graphql
index 228fa1c9430d0..4e9bbb11d8c5a 100644
--- a/datahub-web-react/src/graphql/app.graphql
+++ b/datahub-web-react/src/graphql/app.graphql
@@ -64,6 +64,7 @@ query appConfig {
             showBrowseV2
             showAcrylInfo
             showAccessManagement
+            nestedDomainsEnabled
         }
     }
 }
diff --git a/datahub-web-react/src/graphql/domain.graphql b/datahub-web-react/src/graphql/domain.graphql
index d72ff336bf9e7..951b93fcba9af 100644
--- a/datahub-web-react/src/graphql/domain.graphql
+++ b/datahub-web-react/src/graphql/domain.graphql
@@ -2,10 +2,14 @@ query getDomain($urn: String!) {
     domain(urn: $urn) {
         urn
         id
+        type
         properties {
             name
             description
         }
+        parentDomains {
+            ...parentDomainsFields
+        }
         ownership {
             ...ownershipFields
         }
@@ -23,6 +27,9 @@ query getDomain($urn: String!) {
                 }
             }
         }
+        children: relationships(input: { types: ["IsPartOf"], direction: INCOMING, start: 0, count: 0 }) {
+            total
+        }
     }
 }
 
@@ -33,16 +40,29 @@ query listDomains($input: ListDomainsInput!) {
         total
         domains {
             urn
+            id
+            type
             properties {
                 name
                 description
             }
+            parentDomains {
+                ...parentDomainsFields
+            }
             ownership {
                 ...ownershipFields
             }
-            entities(input: { start: 0, count: 1 }) {
-                total
-            }
+            ...domainEntitiesFields
+        }
+    }
+}
+
+query getDomainChildrenCount($urn: String!) {
+    domain(urn: $urn) {
+        urn
+        type
+        children: relationships(input: { types: ["IsPartOf"], direction: INCOMING, start: 0, count: 0 }) {
+            total
         }
     }
 }
@@ -51,6 +71,10 @@ mutation createDomain($input: CreateDomainInput!) {
     createDomain(input: $input)
 }
 
+mutation moveDomain($input: MoveDomainInput!) {
+    moveDomain(input: $input)
+}
+
 mutation deleteDomain($urn: String!) {
     deleteDomain(urn: $urn)
 }
diff --git a/datahub-web-react/src/graphql/fragments.graphql b/datahub-web-react/src/graphql/fragments.graphql
index c3ac2139e687b..72474911b9310 100644
--- a/datahub-web-react/src/graphql/fragments.graphql
+++ b/datahub-web-react/src/graphql/fragments.graphql
@@ -82,6 +82,20 @@ fragment parentNodesFields on ParentNodesResult {
     }
 }
 
+fragment parentDomainsFields on ParentDomainsResult {
+    count
+    domains {
+        urn
+        type
+        ... on Domain {
+            properties {
+                name
+                description
+            }
+        }
+    }
+}
+
 fragment ownershipFields on Ownership {
     owners {
         owner {
@@ -931,6 +945,20 @@ fragment parentContainerFields on Container {
     }
 }
 
+fragment domainEntitiesFields on Domain {
+    entities(input: { start: 0, count: 0 }) {
+        total
+    }
+    dataProducts: entities(
+        input: { start: 0, count: 0, filters: [{ field: "_entityType", value: "DATA_PRODUCT" }] }
+    ) {
+        total
+    }
+    children: relationships(input: { types: ["IsPartOf"], direction: INCOMING, start: 0, count: 0 }) {
+        total
+    }
+}
+
 fragment entityDomain on DomainAssociation {
     domain {
         urn
@@ -939,6 +967,10 @@ fragment entityDomain on DomainAssociation {
             name
             description
         }
+        parentDomains {
+            ...parentDomainsFields
+        }
+        ...domainEntitiesFields
     }
     associatedUrn
 }
diff --git a/datahub-web-react/src/graphql/preview.graphql b/datahub-web-react/src/graphql/preview.graphql
index 03635ab1b66d5..e104d62c67074 100644
--- a/datahub-web-react/src/graphql/preview.graphql
+++ b/datahub-web-react/src/graphql/preview.graphql
@@ -304,7 +304,12 @@ fragment entityPreview on Entity {
         urn
         properties {
             name
+            description
+        }
+        parentDomains {
+            ...parentDomainsFields
         }
+        ...domainEntitiesFields
     }
     ... on Container {
         ...entityContainer
diff --git a/datahub-web-react/src/graphql/search.graphql b/datahub-web-react/src/graphql/search.graphql
index 94ff263c02039..2297c2d0c1d07 100644
--- a/datahub-web-react/src/graphql/search.graphql
+++ b/datahub-web-react/src/graphql/search.graphql
@@ -155,6 +155,9 @@ fragment autoCompleteFields on Entity {
         properties {
             name
         }
+        parentDomains {
+            ...parentDomainsFields
+        }
     }
     ... on DataProduct {
         properties {
@@ -671,6 +674,10 @@ fragment searchResultFields on Entity {
         ownership {
             ...ownershipFields
         }
+        parentDomains {
+            ...parentDomainsFields
+        }
+        ...domainEntitiesFields
     }
     ... on Container {
         properties {
@@ -825,6 +832,9 @@ fragment facetFields on FacetMetadata {
                 properties {
                     name
                 }
+                parentDomains {
+                    ...parentDomainsFields
+                }
             }
             ... on Container {
                 platform {
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java
index 0dae1bd386ccd..53dd0be44f963 100644
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java
@@ -3,7 +3,6 @@
 import java.util.Collections;
 import java.util.Map;
 import java.util.Set;
-import javax.annotation.Nullable;
 import lombok.Getter;
 import lombok.RequiredArgsConstructor;
 import lombok.ToString;
@@ -26,21 +25,6 @@ public Set<String> getFieldValues(ResourceFieldType resourceFieldType) {
     return fieldResolvers.get(resourceFieldType).getFieldValuesFuture().join().getValues();
   }
 
-  /**
-   * Fetch the entity-registry type for a resource. ('dataset', 'dashboard', 'chart').
-   * @return the entity type.
-   */
-  public String getType() {
-    if (!fieldResolvers.containsKey(ResourceFieldType.RESOURCE_TYPE)) {
-      throw new UnsupportedOperationException(
-          "Failed to resolve resource type! No field resolver for RESOURCE_TYPE provided.");
-    }
-    Set<String> resourceTypes =
-        fieldResolvers.get(ResourceFieldType.RESOURCE_TYPE).getFieldValuesFuture().join().getValues();
-    assert resourceTypes.size() == 1; // There should always be a single resource type.
-    return resourceTypes.stream().findFirst().get();
-  }
-
   /**
    * Fetch the owners for a resource.
    * @return a set of owner urns, or empty set if none exist.
@@ -51,20 +35,4 @@ public Set<String> getOwners() {
     }
     return fieldResolvers.get(ResourceFieldType.OWNER).getFieldValuesFuture().join().getValues();
   }
-
-  /**
-   * Fetch the domain for a Resolved Resource Spec
-   * @return a Domain or null if one does not exist.
-   */
-  @Nullable
-  public String getDomain() {
-    if (!fieldResolvers.containsKey(ResourceFieldType.DOMAIN)) {
-      return null;
-    }
-    Set<String> domains = fieldResolvers.get(ResourceFieldType.DOMAIN).getFieldValuesFuture().join().getValues();
-    if (domains.size() > 0) {
-      return domains.stream().findFirst().get();
-    }
-    return null;
-  }
 }
diff --git a/metadata-models/src/main/pegasus/com/linkedin/domain/DomainProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/domain/DomainProperties.pdl
index 5c8c8a4912e4c..89f44a433b7ba 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/domain/DomainProperties.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/domain/DomainProperties.pdl
@@ -1,6 +1,7 @@
 namespace com.linkedin.domain
 
 import com.linkedin.common.AuditStamp
+import com.linkedin.common.Urn
 
 /**
  * Information about a Domain
@@ -36,4 +37,18 @@ record DomainProperties {
      }
    }
    created: optional AuditStamp
+
+  /**
+   * Optional: Parent of the domain
+   */
+  @Relationship = {
+    "name": "IsPartOf",
+    "entityTypes": [ "domain" ],
+  }
+  @Searchable = {
+    "fieldName": "parentDomain",
+    "fieldType": "URN",
+    "hasValuesFieldName": "hasParentDomain"
+  }
+  parentDomain: optional Urn
 }
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java
index ae87812f3b79c..68c1dd4f644e5 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java
@@ -6,15 +6,23 @@
 import com.datahub.authorization.ResourceSpec;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
+import com.linkedin.domain.DomainProperties;
 import com.linkedin.domain.Domains;
 import com.linkedin.entity.EntityResponse;
 import com.linkedin.entity.EnvelopedAspect;
 import com.linkedin.entity.client.EntityClient;
+
 import java.util.Collections;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
 import java.util.stream.Collectors;
+
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 
+import javax.annotation.Nonnull;
+
 import static com.linkedin.metadata.Constants.*;
 
 
@@ -38,8 +46,40 @@ public FieldResolver getFieldResolver(ResourceSpec resourceSpec) {
     return FieldResolver.getResolverFromFunction(resourceSpec, this::getDomains);
   }
 
+  private Set<Urn> getBatchedParentDomains(@Nonnull final Set<Urn> urns) {
+    final Set<Urn> parentUrns = new HashSet<>();
+
+    try {
+      final Map<Urn, EntityResponse> batchResponse = _entityClient.batchGetV2(
+          DOMAIN_ENTITY_NAME,
+          urns,
+          Collections.singleton(DOMAIN_PROPERTIES_ASPECT_NAME),
+          _systemAuthentication
+      );
+
+      batchResponse.forEach((urn, entityResponse) -> {
+        if (entityResponse.getAspects().containsKey(DOMAIN_PROPERTIES_ASPECT_NAME)) {
+          final DomainProperties properties = new DomainProperties(entityResponse.getAspects().get(DOMAIN_PROPERTIES_ASPECT_NAME).getValue().data());
+          if (properties.hasParentDomain()) {
+            parentUrns.add(properties.getParentDomain());
+          }
+        }
+      });
+
+    } catch (Exception e) {
+      log.error(
+          "Error while retrieving parent domains for {} urns including \"{}\"",
+          urns.size(),
+          urns.stream().findFirst().map(Urn::toString).orElse(""),
+          e
+      );
+    }
+
+    return parentUrns;
+  }
+
   private FieldResolver.FieldValue getDomains(ResourceSpec resourceSpec) {
-    Urn entityUrn = UrnUtils.getUrn(resourceSpec.getResource());
+    final Urn entityUrn = UrnUtils.getUrn(resourceSpec.getResource());
     // In the case that the entity is a domain, the associated domain is the domain itself
     if (entityUrn.getEntityType().equals(DOMAIN_ENTITY_NAME)) {
       return FieldResolver.FieldValue.builder()
@@ -47,7 +87,7 @@ private FieldResolver.FieldValue getDomains(ResourceSpec resourceSpec) {
           .build();
     }
 
-    EnvelopedAspect domainsAspect;
+    final EnvelopedAspect domainsAspect;
     try {
       EntityResponse response = _entityClient.getV2(entityUrn.getEntityType(), entityUrn,
           Collections.singleton(DOMAINS_ASPECT_NAME), _systemAuthentication);
@@ -59,9 +99,25 @@ private FieldResolver.FieldValue getDomains(ResourceSpec resourceSpec) {
       log.error("Error while retrieving domains aspect for urn {}", entityUrn, e);
       return FieldResolver.emptyFieldValue();
     }
-    Domains domains = new Domains(domainsAspect.getValue().data());
-    return FieldResolver.FieldValue.builder()
-        .values(domains.getDomains().stream().map(Object::toString).collect(Collectors.toSet()))
-        .build();
+
+    /*
+     * Build up a set of all directly referenced domains and any of the domains' parent domains.
+     * To avoid cycles we remove any parents we've already visited to prevent an infinite loop cycle.
+     */
+
+    final Set<Urn> domainUrns = new HashSet<>(new Domains(domainsAspect.getValue().data()).getDomains());
+    Set<Urn> batchedParentUrns = getBatchedParentDomains(domainUrns);
+    batchedParentUrns.removeAll(domainUrns);
+
+    while (!batchedParentUrns.isEmpty()) {
+      domainUrns.addAll(batchedParentUrns);
+      batchedParentUrns = getBatchedParentDomains(batchedParentUrns);
+      batchedParentUrns.removeAll(domainUrns);
+    }
+
+    return FieldResolver.FieldValue.builder().values(domainUrns
+        .stream()
+        .map(Object::toString)
+        .collect(Collectors.toSet())).build();
   }
 }
diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java
index 1ed794be15490..2e48123fb1813 100644
--- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java
+++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java
@@ -12,7 +12,10 @@
 import com.linkedin.common.OwnershipType;
 import com.linkedin.common.UrnArray;
 import com.linkedin.common.urn.Urn;
+import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.data.template.StringArray;
+import com.linkedin.domain.DomainProperties;
+import com.linkedin.domain.Domains;
 import com.linkedin.entity.Aspect;
 import com.linkedin.entity.EntityResponse;
 import com.linkedin.entity.EnvelopedAspect;
@@ -25,16 +28,19 @@
 import com.linkedin.policy.DataHubActorFilter;
 import com.linkedin.policy.DataHubPolicyInfo;
 import com.linkedin.policy.DataHubResourceFilter;
+
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.Optional;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
-import static com.linkedin.metadata.Constants.DATAHUB_POLICY_INFO_ASPECT_NAME;
-import static com.linkedin.metadata.Constants.OWNERSHIP_ASPECT_NAME;
-import static com.linkedin.metadata.Constants.POLICY_ENTITY_NAME;
+import javax.annotation.Nullable;
+
+import static com.linkedin.metadata.Constants.*;
 import static com.linkedin.metadata.authorization.PoliciesConfig.ACTIVE_POLICY_STATE;
 import static com.linkedin.metadata.authorization.PoliciesConfig.INACTIVE_POLICY_STATE;
 import static com.linkedin.metadata.authorization.PoliciesConfig.METADATA_POLICY_TYPE;
@@ -52,6 +58,9 @@ public class DataHubAuthorizerTest {
 
   public static final String DATAHUB_SYSTEM_CLIENT_ID = "__datahub_system";
 
+  private static final Urn PARENT_DOMAIN_URN = UrnUtils.getUrn("urn:li:domain:parent");
+  private static final Urn CHILD_DOMAIN_URN = UrnUtils.getUrn("urn:li:domain:child");
+
   private EntityClient _entityClient;
   private DataHubAuthorizer _dataHubAuthorizer;
 
@@ -61,39 +70,71 @@ public void setupTest() throws Exception {
 
     // Init mocks.
     final Urn activePolicyUrn = Urn.createFromString("urn:li:dataHubPolicy:0");
-    final DataHubPolicyInfo activePolicy = createDataHubPolicyInfo(true, ImmutableList.of("EDIT_ENTITY_TAGS"));
+    final DataHubPolicyInfo activePolicy = createDataHubPolicyInfo(true, ImmutableList.of("EDIT_ENTITY_TAGS"), null);
     final EnvelopedAspectMap activeAspectMap = new EnvelopedAspectMap();
     activeAspectMap.put(DATAHUB_POLICY_INFO_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(activePolicy.data())));
 
     final Urn inactivePolicyUrn = Urn.createFromString("urn:li:dataHubPolicy:1");
-    final DataHubPolicyInfo inactivePolicy = createDataHubPolicyInfo(false, ImmutableList.of("EDIT_ENTITY_OWNERS"));
+    final DataHubPolicyInfo inactivePolicy = createDataHubPolicyInfo(false, ImmutableList.of("EDIT_ENTITY_OWNERS"), null);
     final EnvelopedAspectMap inactiveAspectMap = new EnvelopedAspectMap();
     inactiveAspectMap.put(DATAHUB_POLICY_INFO_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(inactivePolicy.data())));
 
+    final Urn parentDomainPolicyUrn = Urn.createFromString("urn:li:dataHubPolicy:2");
+    final DataHubPolicyInfo parentDomainPolicy = createDataHubPolicyInfo(true, ImmutableList.of("EDIT_ENTITY_DOCS"), PARENT_DOMAIN_URN);
+    final EnvelopedAspectMap parentDomainPolicyAspectMap = new EnvelopedAspectMap();
+    parentDomainPolicyAspectMap.put(DATAHUB_POLICY_INFO_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(parentDomainPolicy.data())));
+
+    final Urn childDomainPolicyUrn = Urn.createFromString("urn:li:dataHubPolicy:3");
+    final DataHubPolicyInfo childDomainPolicy = createDataHubPolicyInfo(true, ImmutableList.of("EDIT_ENTITY_STATUS"), CHILD_DOMAIN_URN);
+    final EnvelopedAspectMap childDomainPolicyAspectMap = new EnvelopedAspectMap();
+    childDomainPolicyAspectMap.put(DATAHUB_POLICY_INFO_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(childDomainPolicy.data())));
+
     final SearchResult policySearchResult = new SearchResult();
-    policySearchResult.setNumEntities(2);
-    policySearchResult.setEntities(new SearchEntityArray(ImmutableList.of(new SearchEntity().setEntity(activePolicyUrn),
-        new SearchEntity().setEntity(inactivePolicyUrn))));
+    policySearchResult.setNumEntities(3);
+    policySearchResult.setEntities(
+        new SearchEntityArray(
+            ImmutableList.of(
+                new SearchEntity().setEntity(activePolicyUrn),
+                new SearchEntity().setEntity(inactivePolicyUrn),
+                new SearchEntity().setEntity(parentDomainPolicyUrn),
+                new SearchEntity().setEntity(childDomainPolicyUrn)
+            )
+        )
+    );
 
     when(_entityClient.search(eq("dataHubPolicy"), eq(""), isNull(), any(), anyInt(), anyInt(), any(),
         eq(new SearchFlags().setFulltext(true)))).thenReturn(policySearchResult);
     when(_entityClient.batchGetV2(eq(POLICY_ENTITY_NAME),
-        eq(ImmutableSet.of(activePolicyUrn, inactivePolicyUrn)), eq(null), any())).thenReturn(
+        eq(ImmutableSet.of(activePolicyUrn, inactivePolicyUrn, parentDomainPolicyUrn, childDomainPolicyUrn)), eq(null), any())).thenReturn(
         ImmutableMap.of(
             activePolicyUrn, new EntityResponse().setUrn(activePolicyUrn).setAspects(activeAspectMap),
-            inactivePolicyUrn, new EntityResponse().setUrn(inactivePolicyUrn).setAspects(inactiveAspectMap)
+            inactivePolicyUrn, new EntityResponse().setUrn(inactivePolicyUrn).setAspects(inactiveAspectMap),
+            parentDomainPolicyUrn, new EntityResponse().setUrn(parentDomainPolicyUrn).setAspects(parentDomainPolicyAspectMap),
+            childDomainPolicyUrn, new EntityResponse().setUrn(childDomainPolicyUrn).setAspects(childDomainPolicyAspectMap)
         )
     );
 
     final List<Urn> userUrns = ImmutableList.of(Urn.createFromString("urn:li:corpuser:user3"), Urn.createFromString("urn:li:corpuser:user4"));
     final List<Urn> groupUrns = ImmutableList.of(Urn.createFromString("urn:li:corpGroup:group3"), Urn.createFromString("urn:li:corpGroup:group4"));
-    EntityResponse entityResponse = new EntityResponse();
-    EnvelopedAspectMap envelopedAspectMap = new EnvelopedAspectMap();
-    envelopedAspectMap.put(OWNERSHIP_ASPECT_NAME, new EnvelopedAspect()
+    EntityResponse ownershipResponse = new EntityResponse();
+    EnvelopedAspectMap ownershipAspectMap = new EnvelopedAspectMap();
+    ownershipAspectMap.put(OWNERSHIP_ASPECT_NAME, new EnvelopedAspect()
         .setValue(new com.linkedin.entity.Aspect(createOwnershipAspect(userUrns, groupUrns).data())));
-    entityResponse.setAspects(envelopedAspectMap);
+    ownershipResponse.setAspects(ownershipAspectMap);
     when(_entityClient.getV2(any(), any(), eq(Collections.singleton(OWNERSHIP_ASPECT_NAME)), any()))
-        .thenReturn(entityResponse);
+        .thenReturn(ownershipResponse);
+
+    // Mocks to get domains on a resource
+    when(_entityClient.getV2(any(), any(), eq(Collections.singleton(DOMAINS_ASPECT_NAME)), any()))
+        .thenReturn(createDomainsResponse(CHILD_DOMAIN_URN));
+
+    // Mocks to get parent domains on a domain
+    when(_entityClient.batchGetV2(any(), eq(Collections.singleton(CHILD_DOMAIN_URN)), eq(Collections.singleton(DOMAIN_PROPERTIES_ASPECT_NAME)), any()))
+        .thenReturn(createDomainPropertiesBatchResponse(PARENT_DOMAIN_URN));
+
+    // Mocks to reach the stopping point on domain parents
+    when(_entityClient.batchGetV2(any(), eq(Collections.singleton(PARENT_DOMAIN_URN)), eq(Collections.singleton(DOMAIN_PROPERTIES_ASPECT_NAME)), any()))
+        .thenReturn(createDomainPropertiesBatchResponse(null));
 
     final Authentication systemAuthentication = new Authentication(
         new Actor(ActorType.USER, DATAHUB_SYSTEM_CLIENT_ID),
@@ -229,7 +270,46 @@ public void testAuthorizedActorsActivePolicy() throws Exception {
     ));
   }
 
-  private DataHubPolicyInfo createDataHubPolicyInfo(boolean active, List<String> privileges) throws Exception {
+  @Test
+  public void testAuthorizationOnDomainWithPrivilegeIsAllowed() {
+    ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test");
+
+    AuthorizationRequest request = new AuthorizationRequest(
+        "urn:li:corpuser:test",
+        "EDIT_ENTITY_STATUS",
+        Optional.of(resourceSpec)
+    );
+
+    assertEquals(_dataHubAuthorizer.authorize(request).getType(), AuthorizationResult.Type.ALLOW);
+  }
+
+  @Test
+  public void testAuthorizationOnDomainWithParentPrivilegeIsAllowed() {
+    ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test");
+
+    AuthorizationRequest request = new AuthorizationRequest(
+        "urn:li:corpuser:test",
+        "EDIT_ENTITY_DOCS",
+        Optional.of(resourceSpec)
+    );
+
+    assertEquals(_dataHubAuthorizer.authorize(request).getType(), AuthorizationResult.Type.ALLOW);
+  }
+
+  @Test
+  public void testAuthorizationOnDomainWithoutPrivilegeIsDenied() {
+    ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test");
+
+    AuthorizationRequest request = new AuthorizationRequest(
+        "urn:li:corpuser:test",
+        "EDIT_ENTITY_DOC_LINKS",
+        Optional.of(resourceSpec)
+    );
+
+    assertEquals(_dataHubAuthorizer.authorize(request).getType(), AuthorizationResult.Type.DENY);
+  }
+
+  private DataHubPolicyInfo createDataHubPolicyInfo(boolean active, List<String> privileges, @Nullable final Urn domain) throws Exception {
     final DataHubPolicyInfo dataHubPolicyInfo = new DataHubPolicyInfo();
     dataHubPolicyInfo.setType(METADATA_POLICY_TYPE);
     dataHubPolicyInfo.setState(active ? ACTIVE_POLICY_STATE : INACTIVE_POLICY_STATE);
@@ -252,7 +332,13 @@ private DataHubPolicyInfo createDataHubPolicyInfo(boolean active, List<String> p
     final DataHubResourceFilter resourceFilter = new DataHubResourceFilter();
     resourceFilter.setAllResources(true);
     resourceFilter.setType("dataset");
+
+    if (domain != null) {
+      resourceFilter.setFilter(FilterUtils.newFilter(ImmutableMap.of(ResourceFieldType.DOMAIN, Collections.singletonList(domain.toString()))));
+    }
+
     dataHubPolicyInfo.setResources(resourceFilter);
+
     return dataHubPolicyInfo;
   }
 
@@ -284,6 +370,33 @@ private Ownership createOwnershipAspect(final List<Urn> userOwners, final List<U
     return ownershipAspect;
   }
 
+  private EntityResponse createDomainsResponse(final Urn domainUrn) {
+    final List<Urn> domainUrns = ImmutableList.of(domainUrn);
+    final EntityResponse domainsResponse = new EntityResponse();
+    EnvelopedAspectMap domainsAspectMap = new EnvelopedAspectMap();
+    final Domains domains = new Domains();
+    domains.setDomains(new UrnArray(domainUrns));
+    domainsAspectMap.put(DOMAINS_ASPECT_NAME, new EnvelopedAspect()
+        .setValue(new com.linkedin.entity.Aspect(domains.data())));
+    domainsResponse.setAspects(domainsAspectMap);
+    return domainsResponse;
+  }
+
+  private Map<Urn, EntityResponse> createDomainPropertiesBatchResponse(@Nullable final Urn parentDomainUrn) {
+    final Map<Urn, EntityResponse> batchResponse = new HashMap<>();
+    final EntityResponse response = new EntityResponse();
+    EnvelopedAspectMap aspectMap = new EnvelopedAspectMap();
+    final DomainProperties properties = new DomainProperties();
+    if (parentDomainUrn != null) {
+      properties.setParentDomain(parentDomainUrn);
+    }
+    aspectMap.put(DOMAIN_PROPERTIES_ASPECT_NAME, new EnvelopedAspect()
+        .setValue(new com.linkedin.entity.Aspect(properties.data())));
+    response.setAspects(aspectMap);
+    batchResponse.put(parentDomainUrn, response);
+    return batchResponse;
+  }
+
   private AuthorizerContext createAuthorizerContext(final Authentication systemAuthentication, final EntityClient entityClient) {
     return new AuthorizerContext(Collections.emptyMap(), new DefaultResourceSpecResolver(systemAuthentication, entityClient));
   }
diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml
index 6fd7b9e6a295c..ea959bebf25ad 100644
--- a/metadata-service/configuration/src/main/resources/application.yml
+++ b/metadata-service/configuration/src/main/resources/application.yml
@@ -300,6 +300,7 @@ featureFlags:
   preProcessHooks:
     uiEnabled: ${PRE_PROCESS_HOOKS_UI_ENABLED:true} # Circumvents Kafka for processing index updates for UI changes sourced from GraphQL to avoid processing delays
   showAcrylInfo: ${SHOW_ACRYL_INFO:false} # Show different CTAs within DataHub around moving to Managed DataHub. Set to true for the demo site.
+  nestedDomainsEnabled: ${NESTED_DOMAINS_ENABLED:true} # Enables the nested Domains feature that allows users to have sub-Domains. If this is off, Domains appear "flat" again
 
 entityChangeEvents:
   enabled: ${ENABLE_ENTITY_CHANGE_EVENTS_HOOK:true}
diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java b/metadata-service/services/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java
index 14b301f93f4ef..036fb20b33f20 100644
--- a/metadata-service/services/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java
+++ b/metadata-service/services/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java
@@ -53,6 +53,7 @@ public enum DataHubUsageEventType {
   SHOW_STANDARD_HOME_PAGE_EVENT("ShowStandardHomepageEvent"),
   CREATE_GLOSSARY_ENTITY_EVENT("CreateGlossaryEntityEvent"),
   CREATE_DOMAIN_EVENT("CreateDomainEvent"),
+  MOVE_DOMAIN_EVENT("MoveDomainEvent"),
   CREATE_INGESTION_SOURCE_EVENT("CreateIngestionSourceEvent"),
   UPDATE_INGESTION_SOURCE_EVENT("UpdateIngestionSourceEvent"),
   DELETE_INGESTION_SOURCE_EVENT("DeleteIngestionSourceEvent"),
diff --git a/node_modules/.yarn-integrity b/node_modules/.yarn-integrity
new file mode 100644
index 0000000000000..42a6cb985ab1b
--- /dev/null
+++ b/node_modules/.yarn-integrity
@@ -0,0 +1,12 @@
+{
+  "systemParams": "darwin-arm64-93",
+  "modulesFolders": [
+    "node_modules"
+  ],
+  "flags": [],
+  "linkedModules": [],
+  "topLevelPatterns": [],
+  "lockfileEntries": {},
+  "files": [],
+  "artifacts": {}
+}
\ No newline at end of file
diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/domains.js b/smoke-test/tests/cypress/cypress/e2e/mutations/domains.js
index c3608e235391c..3de0e9b4b893e 100644
--- a/smoke-test/tests/cypress/cypress/e2e/mutations/domains.js
+++ b/smoke-test/tests/cypress/cypress/e2e/mutations/domains.js
@@ -1,14 +1,32 @@
+import { aliasQuery, hasOperationName } from "../utils";
+
 const test_domain_id = Math.floor(Math.random() * 100000);
 const test_domain = `CypressDomainTest ${test_domain_id}`
 const test_domain_urn = `urn:li:domain:${test_domain_id}`
 
 
 describe("add remove domain", () => {
+    beforeEach(() => {
+        cy.intercept("POST", "/api/v2/graphql", (req) => {
+          aliasQuery(req, "appConfig");
+        });
+      });
+    
+      const setDomainsFeatureFlag = (isOn) => {
+        cy.intercept("POST", "/api/v2/graphql", (req) => {
+          if (hasOperationName(req, "appConfig")) {
+            req.reply((res) => {
+              res.body.data.appConfig.featureFlags.nestedDomainsEnabled = isOn;
+            });
+          }
+        });
+      };
+
     it("create domain", () => {
         cy.loginWithCredentials();
         cy.goToDomainList();
         cy.clickOptionWithText("New Domain");
-        cy.waitTextVisible("Create new Domain");
+        cy.waitTextVisible("Create New Domain");
         cy.get('[data-testid="create-domain-name"]').click().type(test_domain)
         cy.clickOptionWithText('Advanced')
         cy.get('[data-testid="create-domain-id"]').click().type(test_domain_id)
@@ -17,6 +35,7 @@ describe("add remove domain", () => {
     })
 
     it("add entities to domain", () => {
+        setDomainsFeatureFlag(false);
         cy.loginWithCredentials();
         cy.goToDomainList();
         cy.clickOptionWithText(test_domain);
@@ -32,6 +51,7 @@ describe("add remove domain", () => {
     })
 
     it("remove entity from domain", () => {
+        setDomainsFeatureFlag(false);
         cy.loginWithCredentials();
         cy.goToDomainList();
         cy.removeDomainFromDataset(
@@ -42,6 +62,7 @@ describe("add remove domain", () => {
     })
 
     it("delete a domain and ensure dangling reference is deleted on entities", () => {
+        setDomainsFeatureFlag(false);
         cy.loginWithCredentials();
         cy.goToDomainList();
         cy.get('[data-testid="dropdown-menu-' + test_domain_urn + '"]').click();
diff --git a/yarn.lock b/yarn.lock
new file mode 100644
index 0000000000000..fb57ccd13afbd
--- /dev/null
+++ b/yarn.lock
@@ -0,0 +1,4 @@
+# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
+# yarn lockfile v1
+
+

From 67af68284f952b2a466d58be5b4427c488de522b Mon Sep 17 00:00:00 2001
From: Gabe Lyons <itsgabelyons@gmail.com>
Date: Tue, 19 Sep 2023 09:02:24 -0700
Subject: [PATCH 021/156] dcs(ml-models): enhancing ml model documentation
 (#8848)

---
 docs/api/tutorials/ml.md                      | 146 +++++++++++++++---
 .../examples/library/create_mlfeature.py      |  13 +-
 .../library/create_mlfeature_table.py         |  21 ++-
 .../examples/library/create_mlmodel.py        |   8 +-
 .../examples/library/create_mlmodel_group.py  |   4 +-
 .../examples/library/create_mlprimarykey.py   |  34 ++++
 .../examples/library/read_mlprimarykey.py     |  12 ++
 7 files changed, 201 insertions(+), 37 deletions(-)
 create mode 100644 metadata-ingestion/examples/library/create_mlprimarykey.py
 create mode 100644 metadata-ingestion/examples/library/read_mlprimarykey.py

diff --git a/docs/api/tutorials/ml.md b/docs/api/tutorials/ml.md
index cb77556d48ebf..e88c941c90467 100644
--- a/docs/api/tutorials/ml.md
+++ b/docs/api/tutorials/ml.md
@@ -7,11 +7,12 @@ import TabItem from '@theme/TabItem';
 
 Machine learning systems have become a crucial feature in modern data stacks.
 However, the relationships between the different components of a machine learning system, such as features, models, and feature tables, can be complex.
-Thus, it is essential for these systems to be discoverable to facilitate easy access and utilization by other members of the organization.
+DataHub makes these relationships discoverable and facilitate utilization by other members of the organization.
 
-For more information on ML entities, please refer to the following docs:
+For technical details on ML entities, please refer to the following docs:
 
 - [MlFeature](/docs/generated/metamodel/entities/mlFeature.md)
+- [MlPrimaryKey](/docs/generated/metamodel/entities/mlPrimaryKey.md)
 - [MlFeatureTable](/docs/generated/metamodel/entities/mlFeatureTable.md)
 - [MlModel](/docs/generated/metamodel/entities/mlModel.md)
 - [MlModelGroup](/docs/generated/metamodel/entities/mlModelGroup.md)
@@ -20,9 +21,11 @@ For more information on ML entities, please refer to the following docs:
 
 This guide will show you how to
 
-- Create ML entities: MlFeature, MlFeatureTable, MlModel, MlModelGroup
-- Read ML entities: MlFeature, MlFeatureTable, MlModel, MlModelGroup
-- Attach MlFeatureTable or MlModel to MlFeature
+- Create ML entities: MlFeature, MlFeatureTable, MlModel, MlModelGroup, MlPrimaryKey
+- Read ML entities: MlFeature, MlFeatureTable, MlModel, MlModelGroup, MlPrimaryKey
+- Attach MlModel to MlFeature
+- Attach MlFeatures to MlFeatureTable
+- Attached MlFeatures to upstream Datasets that power them
 
 ## Prerequisites
 
@@ -33,6 +36,8 @@ For detailed steps, please refer to [Datahub Quickstart Guide](/docs/quickstart.
 
 ### Create MlFeature
 
+An ML Feature represents an instance of a feature that can be used across different machine learning models. Features are organized into Feature Tables to be consumed by machine learning models. For example, if we were modeling features for a Users Feature Table, the Features would be `age`, `sign_up_date`, `active_in_past_30_days` and so forth.Using Features in DataHub allows users to see the sources a feature was generated from and how a feature is used to train models.
+
 <Tabs>
 <TabItem value="python" label="Python" default>
 
@@ -40,13 +45,31 @@ For detailed steps, please refer to [Datahub Quickstart Guide](/docs/quickstart.
 {{ inline /metadata-ingestion/examples/library/create_mlfeature.py show_path_as_comment }}
 ```
 
-Note that when creating a feature, you can access a list of data sources using `sources`.
+Note that when creating a feature, you create upstream lineage to the data warehouse using `sources`.
+
+</TabItem>
+</Tabs>
+
+### Create MlPrimaryKey
+
+An ML Primary Key represents a specific element of a Feature Table that indicates what group the other features belong to. For example, if a Feature Table contained features for Users, the ML Primary Key would likely be `user_id` or some similar unique identifier for a user. Using ML Primary Keys in DataHub allow users to indicate how ML Feature Tables are structured.
+
+<Tabs>
+<TabItem value="python" label="Python" default>
+
+```python
+{{ inline /metadata-ingestion/examples/library/create_mlprimarykey.py show_path_as_comment }}
+```
+
+Note that when creating a primary key, you create upstream lineage to the data warehouse using `sources`.
 
 </TabItem>
 </Tabs>
 
 ### Create MlFeatureTable
 
+A feature table represents a group of similar Features that can all be used together to train a model. For example, if there was a Users Feature Table, it would contain documentation around how to use the Users collection of Features and references to each Feature and ML Primary Key contained within it.
+
 <Tabs>
 <TabItem value="python" label="Python" default>
 
@@ -54,14 +77,14 @@ Note that when creating a feature, you can access a list of data sources using `
 {{ inline /metadata-ingestion/examples/library/create_mlfeature_table.py show_path_as_comment }}
 ```
 
-Note that when creating a feature table, you can access a list of features using `mlFeatures`.
+Note that when creating a feature table, you connect the table to its features and primary key using `mlFeatures` and `mlPrimaryKeys`.
 
 </TabItem>
 </Tabs>
 
 ### Create MlModel
 
-Please note that an MlModel represents the outcome of a single training run for a model, not the collective results of all model runs.
+An ML Model in Acryl represents an individual version of a trained Machine Learning Model. Another way to think about the ML Model entity is as an istance of a training run. An ML Model entity tracks the exact ML Features used in that instance of training, along with the training results. This entity does not represents all versions of a ML Model. For example, if we train a model for homepage customization on a certain day, that would be a ML Model in DataHub. If you re-train the model the next day off of new data or with different parameters, that would produce a second ML Model entity.
 
 <Tabs>
 <TabItem value="python" label="Python" default>
@@ -70,15 +93,15 @@ Please note that an MlModel represents the outcome of a single training run for
 {{ inline /metadata-ingestion/examples/library/create_mlmodel.py show_path_as_comment }}
 ```
 
-Note that when creating a model, you can access a list of features using `mlFeatures`.
-Additionally, you can access the relationship to model groups with `groups`.
+Note that when creating a model, you link it to a list of features using `mlFeatures`. This indicates how the individual instance of the model was trained.
+Additionally, you can access the relationship to model groups with `groups`. An ML Model is connected to the warehouse tables it depends on via its dependency on the ML Features it reads from.
 
 </TabItem>
 </Tabs>
 
 ### Create MlModelGroup
 
-Please note that an MlModelGroup serves as a container for all the runs of a single ML model.
+An ML Model Group represents the grouping of all training runs of a single Machine Learning model category. It will store documentation about the group of ML Models, along with references to each individual ML Model instance.
 
 <Tabs>
 <TabItem value="python" label="Python" default>
@@ -94,18 +117,14 @@ Please note that an MlModelGroup serves as a container for all the runs of a sin
 
 You can search the entities in DataHub UI.
 
-
 <p align="center">
   <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/apis/tutorials/feature-table-created.png"/>
 </p>
 
-
-
 <p align="center">
   <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/apis/tutorials/model-group-created.png"/>
 </p>
 
-
 ## Read ML Entities
 
 ### Read MLFeature
@@ -192,6 +211,93 @@ Expected response:
 </TabItem>
 </Tabs>
 
+### Read MlPrimaryKey
+
+<Tabs>
+<TabItem value="graphql" label="GraphQL" default>
+
+```json
+query {
+  mlPrimaryKey(urn: "urn:li:mlPrimaryKey:(user_features,user_id)"){
+    name
+    featureNamespace
+    description
+    dataType
+    properties {
+      description
+      dataType
+      version {
+        versionTag
+      }
+    }
+  }
+}
+```
+
+Expected response:
+
+```json
+{
+  "data": {
+    "mlPrimaryKey": {
+      "name": "user_id",
+      "featureNamespace": "user_features",
+      "description": "User's internal ID",
+      "dataType": "ORDINAL",
+      "properties": {
+        "description": "User's internal ID",
+        "dataType": "ORDINAL",
+        "version": null
+      }
+    }
+  },
+  "extensions": {}
+}
+```
+
+</TabItem>
+<TabItem value="curl" label="Curl" default>
+
+```json
+curl --location --request POST 'http://localhost:8080/api/graphql' \
+--header 'Authorization: Bearer <my-access-token>' \
+--header 'Content-Type: application/json' \
+--data-raw '{
+    "query": "query {  mlPrimaryKey(urn: \"urn:li:mlPrimaryKey:(user_features,user_id)\"){    name    featureNamespace    description    dataType    properties {      description      dataType      version {        versionTag      }    }  }}"
+}'
+```
+
+Expected response:
+
+```json
+{
+  "data": {
+    "mlPrimaryKey": {
+      "name": "user_id",
+      "featureNamespace": "user_features",
+      "description": "User's internal ID",
+      "dataType": "ORDINAL",
+      "properties": {
+        "description": "User's internal ID",
+        "dataType": "ORDINAL",
+        "version": null
+      }
+    }
+  },
+  "extensions": {}
+}
+```
+
+</TabItem>
+<TabItem value="python" label="Python">
+
+```python
+{{ inline /metadata-ingestion/examples/library/read_mlprimarykey.py show_path_as_comment }}
+```
+
+</TabItem>
+</Tabs>
+
 ### Read MLFeatureTable
 
 <Tabs>
@@ -232,8 +338,7 @@ Expected Response:
           {
             "name": "test_BOOL_LIST_feature"
           },
-          ...
-          {
+          ...{
             "name": "test_STRING_feature"
           }
         ]
@@ -273,8 +378,7 @@ Expected Response:
           {
             "name": "test_BOOL_LIST_feature"
           },
-          ...
-          {
+          ...{
             "name": "test_STRING_feature"
           }
         ]
@@ -507,14 +611,10 @@ Expected Response: (Note that this entity does not exist in the sample ingestion
 
 You can access to `Features` or `Group` Tab of each entity to view the added entities.
 
-
 <p align="center">
   <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/apis/tutorials/feature-added-to-model.png"/>
 </p>
 
-
-
 <p align="center">
   <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/apis/tutorials/model-group-added-to-model.png"/>
 </p>
-
diff --git a/metadata-ingestion/examples/library/create_mlfeature.py b/metadata-ingestion/examples/library/create_mlfeature.py
index 81104fdb4984f..0f6d146dbf144 100644
--- a/metadata-ingestion/examples/library/create_mlfeature.py
+++ b/metadata-ingestion/examples/library/create_mlfeature.py
@@ -7,11 +7,11 @@
 emitter = DatahubRestEmitter(gms_server="http://localhost:8080", extra_headers={})
 
 dataset_urn = builder.make_dataset_urn(
-    name="fct_users_deleted", platform="hive", env="PROD"
+    name="fct_users_created", platform="hive", env="PROD"
 )
 feature_urn = builder.make_ml_feature_urn(
-    feature_table_name="my-feature-table",
-    feature_name="my-feature",
+    feature_table_name="users_feature_table",
+    feature_name="user_signup_date",
 )
 
 #  Create feature
@@ -21,7 +21,12 @@
     entityUrn=feature_urn,
     aspectName="mlFeatureProperties",
     aspect=models.MLFeaturePropertiesClass(
-        description="my feature", sources=[dataset_urn], dataType="TEXT"
+        description="Represents the date the user created their account",
+        # attaching a source to a feature creates lineage between the feature
+        # and the upstream dataset. This is how lineage between your data warehouse
+        # and machine learning ecosystem is established.
+        sources=[dataset_urn],
+        dataType="TIME",
     ),
 )
 
diff --git a/metadata-ingestion/examples/library/create_mlfeature_table.py b/metadata-ingestion/examples/library/create_mlfeature_table.py
index 1a8fa142376e4..d579d36a0811a 100644
--- a/metadata-ingestion/examples/library/create_mlfeature_table.py
+++ b/metadata-ingestion/examples/library/create_mlfeature_table.py
@@ -7,18 +7,31 @@
 emitter = DatahubRestEmitter(gms_server="http://localhost:8080", extra_headers={})
 
 feature_table_urn = builder.make_ml_feature_table_urn(
-    feature_table_name="my-feature-table", platform="feast"
+    feature_table_name="users_feature_table", platform="feast"
 )
+
 feature_urns = [
     builder.make_ml_feature_urn(
-        feature_name="my-feature", feature_table_name="my-feature-table"
+        feature_name="user_signup_date", feature_table_name="users_feature_table"
     ),
     builder.make_ml_feature_urn(
-        feature_name="my-feature2", feature_table_name="my-feature-table"
+        feature_name="user_last_active_date", feature_table_name="users_feature_table"
     ),
 ]
+
+primary_key_urns = [
+    builder.make_ml_primary_key_urn(
+        feature_table_name="users_feature_table",
+        primary_key_name="user_id",
+    )
+]
+
 feature_table_properties = models.MLFeatureTablePropertiesClass(
-    description="Test description", mlFeatures=feature_urns
+    description="Test description",
+    # link your features to a feature table
+    mlFeatures=feature_urns,
+    # link your primary keys to the feature table
+    mlPrimaryKeys=primary_key_urns,
 )
 
 # MCP creation
diff --git a/metadata-ingestion/examples/library/create_mlmodel.py b/metadata-ingestion/examples/library/create_mlmodel.py
index 630e682eff842..92ca8b93e8208 100644
--- a/metadata-ingestion/examples/library/create_mlmodel.py
+++ b/metadata-ingestion/examples/library/create_mlmodel.py
@@ -6,19 +6,19 @@
 # Create an emitter to DataHub over REST
 emitter = DatahubRestEmitter(gms_server="http://localhost:8080", extra_headers={})
 model_urn = builder.make_ml_model_urn(
-    model_name="my-test-model", platform="science", env="PROD"
+    model_name="my-recommendations-model-run-1", platform="science", env="PROD"
 )
 model_group_urns = [
     builder.make_ml_model_group_urn(
-        group_name="my-model-group", platform="science", env="PROD"
+        group_name="my-recommendations-model-group", platform="science", env="PROD"
     )
 ]
 feature_urns = [
     builder.make_ml_feature_urn(
-        feature_name="my-feature", feature_table_name="my-feature-table"
+        feature_name="user_signup_date", feature_table_name="users_feature_table"
     ),
     builder.make_ml_feature_urn(
-        feature_name="my-feature2", feature_table_name="my-feature-table"
+        feature_name="user_last_active_date", feature_table_name="users_feature_table"
     ),
 ]
 
diff --git a/metadata-ingestion/examples/library/create_mlmodel_group.py b/metadata-ingestion/examples/library/create_mlmodel_group.py
index 325c6e4cc3ccd..e39d26ac0f64e 100644
--- a/metadata-ingestion/examples/library/create_mlmodel_group.py
+++ b/metadata-ingestion/examples/library/create_mlmodel_group.py
@@ -6,7 +6,7 @@
 # Create an emitter to DataHub over REST
 emitter = DatahubRestEmitter(gms_server="http://localhost:8080", extra_headers={})
 model_group_urn = builder.make_ml_model_group_urn(
-    group_name="my-model-group", platform="science", env="PROD"
+    group_name="my-recommendations-model-group", platform="science", env="PROD"
 )
 
 
@@ -16,7 +16,7 @@
     entityUrn=model_group_urn,
     aspectName="mlModelGroupProperties",
     aspect=models.MLModelGroupPropertiesClass(
-        description="my model group",
+        description="Grouping of ml model training runs related to home page recommendations.",
     ),
 )
 
diff --git a/metadata-ingestion/examples/library/create_mlprimarykey.py b/metadata-ingestion/examples/library/create_mlprimarykey.py
new file mode 100644
index 0000000000000..3fb397183a07f
--- /dev/null
+++ b/metadata-ingestion/examples/library/create_mlprimarykey.py
@@ -0,0 +1,34 @@
+import datahub.emitter.mce_builder as builder
+import datahub.metadata.schema_classes as models
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.emitter.rest_emitter import DatahubRestEmitter
+
+# Create an emitter to DataHub over REST
+emitter = DatahubRestEmitter(gms_server="http://localhost:8080", extra_headers={})
+
+dataset_urn = builder.make_dataset_urn(
+    name="fct_users_created", platform="hive", env="PROD"
+)
+primary_key_urn = builder.make_ml_primary_key_urn(
+    feature_table_name="users_feature_table",
+    primary_key_name="user_id",
+)
+
+#  Create feature
+metadata_change_proposal = MetadataChangeProposalWrapper(
+    entityType="mlPrimaryKey",
+    changeType=models.ChangeTypeClass.UPSERT,
+    entityUrn=primary_key_urn,
+    aspectName="mlPrimaryKeyProperties",
+    aspect=models.MLPrimaryKeyPropertiesClass(
+        description="Represents the id of the user the other features relate to.",
+        # attaching a source to a ml primary key creates lineage between the feature
+        # and the upstream dataset. This is how lineage between your data warehouse
+        # and machine learning ecosystem is established.
+        sources=[dataset_urn],
+        dataType="TEXT",
+    ),
+)
+
+# Emit metadata!
+emitter.emit(metadata_change_proposal)
diff --git a/metadata-ingestion/examples/library/read_mlprimarykey.py b/metadata-ingestion/examples/library/read_mlprimarykey.py
new file mode 100644
index 0000000000000..ce2e87cae0b92
--- /dev/null
+++ b/metadata-ingestion/examples/library/read_mlprimarykey.py
@@ -0,0 +1,12 @@
+from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
+
+# Imports for metadata model classes
+from datahub.metadata.schema_classes import MLPrimaryKeyPropertiesClass
+
+# First we get the current owners
+gms_endpoint = "http://localhost:8080"
+graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint))
+
+urn = "urn:li:mlPrimaryKey:(user_features,user_id)"
+result = graph.get_aspect(entity_urn=urn, aspect_type=MLPrimaryKeyPropertiesClass)
+print(result)

From 47b7e2984cc958e8d232609b0444fb27d60e9ab4 Mon Sep 17 00:00:00 2001
From: Gabe Lyons <itsgabelyons@gmail.com>
Date: Tue, 19 Sep 2023 09:07:08 -0700
Subject: [PATCH 022/156] logging(lineage): adding some lineage explorer and
 impact analysis logging (#8849)

---
 datahub-web-react/src/app/analytics/event.ts    | 16 ++++++++++++++++
 .../styled/search/EmbeddedListSearch.tsx        | 17 ++++++++++++++++-
 .../src/app/lineage/LineageExplorer.tsx         | 13 ++++++++++++-
 .../datahubusage/DataHubUsageEventType.java     |  2 ++
 4 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/datahub-web-react/src/app/analytics/event.ts b/datahub-web-react/src/app/analytics/event.ts
index 28cd61ff3171a..2734026400933 100644
--- a/datahub-web-react/src/app/analytics/event.ts
+++ b/datahub-web-react/src/app/analytics/event.ts
@@ -35,6 +35,8 @@ export enum EventType {
     SearchBarExploreAllClickEvent,
     SearchResultsExploreAllClickEvent,
     SearchAcrossLineageEvent,
+    VisualLineageViewEvent,
+    VisualLineageExpandGraphEvent,
     SearchAcrossLineageResultsViewEvent,
     DownloadAsCsvEvent,
     SignUpEvent,
@@ -340,12 +342,23 @@ export interface HomePageRecommendationClickEvent extends BaseEvent {
     index?: number;
 }
 
+export interface VisualLineageViewEvent extends BaseEvent {
+    type: EventType.VisualLineageViewEvent;
+    entityType?: EntityType;
+}
+
+export interface VisualLineageExpandGraphEvent extends BaseEvent {
+    type: EventType.VisualLineageExpandGraphEvent;
+    targetEntityType?: EntityType;
+}
+
 export interface SearchAcrossLineageEvent extends BaseEvent {
     type: EventType.SearchAcrossLineageEvent;
     query: string;
     entityTypeFilter?: EntityType;
     pageNumber: number;
     originPath: string;
+    maxDegree?: string;
 }
 export interface SearchAcrossLineageResultsViewEvent extends BaseEvent {
     type: EventType.SearchAcrossLineageResultsViewEvent;
@@ -353,6 +366,7 @@ export interface SearchAcrossLineageResultsViewEvent extends BaseEvent {
     entityTypeFilter?: EntityType;
     page?: number;
     total: number;
+    maxDegree?: string;
 }
 
 export interface DownloadAsCsvEvent extends BaseEvent {
@@ -641,6 +655,8 @@ export type Event =
     | RecommendationImpressionEvent
     | SearchAcrossLineageEvent
     | SearchAcrossLineageResultsViewEvent
+    | VisualLineageViewEvent
+    | VisualLineageExpandGraphEvent
     | DownloadAsCsvEvent
     | RecommendationClickEvent
     | HomePageRecommendationClickEvent
diff --git a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearch.tsx b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearch.tsx
index 4119a341c5f1b..e27a63b98f012 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearch.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearch.tsx
@@ -7,7 +7,7 @@ import {
     FacetMetadata,
     SearchAcrossEntitiesInput,
 } from '../../../../../../types.generated';
-import { UnionType } from '../../../../../search/utils/constants';
+import { DEGREE_FILTER_NAME, UnionType } from '../../../../../search/utils/constants';
 import { SearchCfg } from '../../../../../../conf';
 import { EmbeddedListSearchResults } from './EmbeddedListSearchResults';
 import EmbeddedListSearchHeader from './EmbeddedListSearchHeader';
@@ -27,6 +27,7 @@ import {
 import { useEntityContext } from '../../../EntityContext';
 import { EntityActionProps } from './EntitySearchResults';
 import { useUserContext } from '../../../../../context/useUserContext';
+import analytics, { EventType } from '../../../../../analytics';
 
 const Container = styled.div`
     display: flex;
@@ -266,6 +267,20 @@ export const EmbeddedListSearch = ({
     const finalFacets =
         (fixedFilters && removeFixedFiltersFromFacets(fixedFilters, data?.facets || [])) || data?.facets;
 
+    // used for logging impact anlaysis events
+    const degreeFilter = filters.find((filter) => filter.field === DEGREE_FILTER_NAME);
+
+    // we already have some lineage logging through Tab events, but this adds additional context, particularly degree
+    if (!loading && (degreeFilter?.values?.length || 0) > 0) {
+        analytics.event({
+            type: EventType.SearchAcrossLineageResultsViewEvent,
+            query,
+            page,
+            total: data?.total || 0,
+            maxDegree: degreeFilter?.values?.sort()?.reverse()[0] || '1',
+        });
+    }
+
     return (
         <Container>
             {error && <Message type="error" content="Failed to load results! An unexpected error occurred." />}
diff --git a/datahub-web-react/src/app/lineage/LineageExplorer.tsx b/datahub-web-react/src/app/lineage/LineageExplorer.tsx
index 2683b9125ad28..ed0b26bde11ef 100644
--- a/datahub-web-react/src/app/lineage/LineageExplorer.tsx
+++ b/datahub-web-react/src/app/lineage/LineageExplorer.tsx
@@ -17,6 +17,7 @@ import { SHOW_COLUMNS_URL_PARAMS, useIsShowColumnsMode } from './utils/useIsShow
 import { ErrorSection } from '../shared/error/ErrorSection';
 import usePrevious from '../shared/usePrevious';
 import { useGetLineageTimeParams } from './utils/useGetLineageTimeParams';
+import analytics, { EventType } from '../analytics';
 
 const DEFAULT_DISTANCE_FROM_TOP = 106;
 
@@ -85,7 +86,13 @@ export default function LineageExplorer({ urn, type }: Props) {
     // they should be added to the dependency array below.
     useEffect(() => {
         setAsyncEntities({});
-    }, [isHideSiblingMode, startTimeMillis, endTimeMillis]);
+        // this can also be our hook for emitting the tracking event
+
+        analytics.event({
+            type: EventType.VisualLineageViewEvent,
+            entityType: entityData?.type,
+        });
+    }, [isHideSiblingMode, startTimeMillis, endTimeMillis, entityData?.type]);
 
     useEffect(() => {
         if (showColumns) {
@@ -183,6 +190,10 @@ export default function LineageExplorer({ urn, type }: Props) {
                         onLineageExpand={(asyncData: EntityAndType) => {
                             resetAsyncEntity(asyncData.entity.urn);
                             maybeAddAsyncLoadedEntity(asyncData);
+                            analytics.event({
+                                type: EventType.VisualLineageExpandGraphEvent,
+                                targetEntityType: asyncData?.type,
+                            });
                         }}
                         refetchCenterNode={() => {
                             refetch().then(() => {
diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java b/metadata-service/services/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java
index 036fb20b33f20..c1018e2031b17 100644
--- a/metadata-service/services/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java
+++ b/metadata-service/services/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java
@@ -67,6 +67,8 @@ public enum DataHubUsageEventType {
   MANUALLY_DELETE_LINEAGE_EVENT("ManuallyDeleteLineageEvent"),
   LINEAGE_GRAPH_TIME_RANGE_SELECTION_EVENT("LineageGraphTimeRangeSelectionEvent"),
   LINEAGE_TAB_TIME_RANGE_SELECTION_EVENT("LineageTabTimeRangeSelectionEvent"),
+  VISUAL_LINEAGE_EXPAND_GRAPH_EVENT("VisualLineageExpandGraphEvent"),
+  VISUAL_LINEAGE_VIEW_EVENT("VisualLineageViewEvent"),
   CREATE_QUERY_EVENT("CreateQueryEvent"),
   DELETE_QUERY_EVENT("DeleteQueryEvent"),
   UPDATE_QUERY_EVENT("UpdateQueryEvent"),

From 35eb194fa38092b37a654fd89238f6556c55aa43 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Wed, 20 Sep 2023 01:01:53 -0700
Subject: [PATCH 023/156] fix(gms): lower telemetry error log level (#8860)

---
 .../src/main/java/com/datahub/telemetry/TrackingService.java   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/telemetry/TrackingService.java b/metadata-service/auth-impl/src/main/java/com/datahub/telemetry/TrackingService.java
index 85f25895e0d49..ac27e1a16c8b7 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/telemetry/TrackingService.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/telemetry/TrackingService.java
@@ -102,7 +102,8 @@ public void emitAnalyticsEvent(@Nonnull final JsonNode event) {
     try {
       _mixpanelAPI.sendMessage(_mixpanelMessageBuilder.event(getClientId(), eventType, sanitizedEvent));
     } catch (IOException e) {
-      log.error("Failed to send event to Mixpanel", e);
+      log.info("Failed to send event to Mixpanel; this does not affect the functionality of the application");
+      log.debug("Failed to send event to Mixpanel", e);
     }
   }
 

From 9fdfa49028f588b339a632b05cf521b659ac20ca Mon Sep 17 00:00:00 2001
From: siladitya <68184387+siladitya2@users.noreply.github.com>
Date: Wed, 20 Sep 2023 16:55:19 +0200
Subject: [PATCH 024/156] fix(datahub-gms) usage stats queryRange API's
 Authorization error for Dataset Owners (#8819)

Co-authored-by: si-chakraborty <si.chakraborty@adevinta.com>
---
 .../com/linkedin/metadata/resources/usage/UsageStats.java     | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java
index ddfdec0315f6b..be70cf9c494ef 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java
@@ -380,8 +380,10 @@ public Task<UsageQueryResult> query(@ActionParam(PARAM_RESOURCE) @Nonnull String
   public Task<UsageQueryResult> queryRange(@ActionParam(PARAM_RESOURCE) @Nonnull String resource,
       @ActionParam(PARAM_DURATION) @Nonnull WindowDuration duration, @ActionParam(PARAM_RANGE) UsageTimeRange range) {
     Authentication auth = AuthenticationContext.getAuthentication();
+    Urn resourceUrn = UrnUtils.getUrn(resource);
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE),
+            new ResourceSpec(resourceUrn.getEntityType(), resourceUrn.toString()))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to query usage.");
     }

From b46635959c42ff5ea94ea0668024d501c82775cf Mon Sep 17 00:00:00 2001
From: Zachary McNellis <zacharymcnellis@gmail.com>
Date: Wed, 20 Sep 2023 14:20:10 -0400
Subject: [PATCH 025/156] docs(observability): Add Custom Assertion user guide
 (#8854)

Co-authored-by: John Joyce <john@acryl.io>
---
 docs-website/sidebars.js                      |   1 +
 .../observe/custom-assertions.md              | 316 ++++++++++++++++++
 2 files changed, 317 insertions(+)
 create mode 100644 docs/managed-datahub/observe/custom-assertions.md

diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index 12691e9f8268a..03ea38fd622d4 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -437,6 +437,7 @@ module.exports = {
           Observability: [
             "docs/managed-datahub/observe/freshness-assertions",
             "docs/managed-datahub/observe/volume-assertions",
+            "docs/managed-datahub/observe/custom-assertions",
           ],
         },
       ],
diff --git a/docs/managed-datahub/observe/custom-assertions.md b/docs/managed-datahub/observe/custom-assertions.md
new file mode 100644
index 0000000000000..d52ac4b38cb4b
--- /dev/null
+++ b/docs/managed-datahub/observe/custom-assertions.md
@@ -0,0 +1,316 @@
+---
+description: This page provides an overview of working with DataHub SQL Assertions
+---
+import FeatureAvailability from '@site/src/components/FeatureAvailability';
+
+
+# Custom Assertions
+
+<FeatureAvailability saasOnly />
+
+
+> ⚠️ The **Custom Assertions** feature is currently in private beta, part of the **Acryl Observe** module, and may only be available to a
+> limited set of design partners.
+>
+> If you are interested in trying it and providing feedback, please reach out to your Acryl Customer Success
+> representative.
+
+## Introduction
+
+Can you remember a time when the meaning of Data Warehouse Table that you depended on fundamentally changed, with little or no notice? 
+If the answer is yes, how did you find out? We'll take a guess - someone looking at an internal reporting dashboard or worse, a user using your your product, sounded an alarm when
+a number looked a bit out of the ordinary. Perhaps your table initially tracked purchases made on your company's e-commerce web store, but suddenly began to include purchases made
+through your company's new mobile app. 
+
+There are many reasons why an important Table on Snowflake, Redshift, or BigQuery may change in its meaning - application code bugs, new feature rollouts,
+changes to key metric definitions, etc. Often times, these changes break important assumptions made about the data used in building key downstream data products 
+like reporting dashboards or data-driven product features.
+
+What if you could reduce the time to detect these incidents, so that the people responsible for the data were made aware of data
+issues _before_ anyone else? With Acryl DataHub **Custom Assertions**, you can.
+
+Acryl DataHub allows users to define complex expectations about a particular warehouse Table through custom SQL queries, and then monitor those expectations over time as the table grows and changes.
+
+In this article, we'll cover the basics of monitoring Custom Assertions - what they are, how to configure them, and more - so that you and your team can
+start building trust in your most important data assets.
+
+Let's get started!
+
+## Support
+
+Custom Assertions are currently supported for:
+
+1. Snowflake
+2. Redshift
+3. BigQuery
+
+Note that an Ingestion Source _must_ be configured with the data platform of your choice in Acryl DataHub's **Ingestion**
+tab.
+
+> Note that SQL Assertions are not yet supported if you are connecting to your warehouse
+> using the DataHub CLI or a Remote Ingestion Executor.
+
+## What is a Custom Assertion?
+
+A **Custom Assertion** is a highly configurable Data Quality rule used to monitor a Data Warehouse Table
+for unexpected or sudden changes in its meaning. Custom Assertions are defined through a raw SQL query that is evaluated against
+the Table. You have full control over the SQL query, and can use any SQL features supported by your Data Warehouse. 
+Custom Assertions can be particularly useful when you have complex tables or relationships
+that are used to generate important metrics or reports, and where the meaning of the table is expected to be stable over time.
+If you have existing SQL queries that you already use to monitor your data, you may find that Custom Assertions are an easy way to port them
+to Acryl DataHub to get started.
+
+For example, imagine that you have a Table that tracks the number of purchases made on your company's e-commerce web store.
+You have a SQL query that you use to calculate the number of purchases made in the past 24 hours, and you'd like to monitor this
+metric over time to ensure that it is always greater than 1000. You can use a Custom Assertion to do this!
+
+
+### Anatomy of a Custom Assertion
+
+At the most basic level, **Custom Assertions** consist of a few important parts:
+
+1. An **Evaluation Schedule**
+2. A **Query**
+3. An **Condition Type**
+4. An **Assertion Description**
+
+In this section, we'll give an overview of each.
+
+#### 1. Evaluation Schedule
+
+The **Evaluation Schedule**: This defines how often to query the given warehouse Table. This should usually
+be configured to match the expected change frequency of the Table, although it can also be less frequently depending
+on the requirements. You can also specify specific days of the week, hours in the day, or even
+minutes in an hour.
+
+
+#### 2. Query
+
+The **Query**: This is the SQL query that will be used to evaluate the Table. The query should return a single row with a single column. Currently only numeric values are supported (integer and floats). The query can be as simple or as complex as you'd like, and can use any SQL features supported by your Data Warehouse. This requires that the configured user account has read access to the asset. Make sure to use the fully qualified name of the Table in your query.
+
+
+Use the "Try it out" button to test your query and ensure that it returns a single row with a single column. The query will be run against the Table in the context of the configured user account, so ensure that the user has read access to the Table.
+
+
+#### 3. Condition Type
+
+The **Condition Type**: This defines the conditions under which the Assertion will **fail**. The list of supported operations is:
+- **Is Equal To**: The assertion will fail if the query result is equal to the configured value
+- **Is Not Equal To**: The assertion will fail if the query result is not equal to the configured value
+- **Is Greater Than**: The assertion will fail if the query result is greater than the configured value
+- **Is Less Than**: The assertion will fail if the query result is less than the configured value
+- **Is False**: The assertion will fail if the query result is false (i.e. 0)
+- **Is outside a range**: The assertion will fail if the query result is outside the configured range
+- **Grows More Than**: The assertion will fail if the query result grows more than the configured range. This can be either a percentage (**Percentage**) or an absolute value (**Differential**).
+- **Grows Less Than**: The assertion will fail if the query result grows less than the configured percentage. This can be either a percentage (**Percentage**) or an absolute value (**Differential**).
+- **Growth is outside a range**: The assertion will fail if the query result growth is outside the configured range. This can be either a percentage (**Percentage**) or an absolute value (**Differential**).
+  
+Custom Assertions also have an off switch: they can be started or stopped at any time with the click of button.
+
+#### 4. Assertion Description
+
+The **Assertion Description**: This is a human-readable description of the Assertion. It should be used to describe the meaning of the Assertion, and can be used to provide additional context to users who are viewing the Assertion.
+
+
+## Creating a Custom Assertion
+
+### Prerequisites
+
+1. **Permissions**: To create or delete Custom Assertions for a specific entity on DataHub, you'll need to be granted the
+   `Edit Assertions` and `Edit Monitors` privileges for the entity. This is granted to Entity owners by default.
+
+2. **Data Platform Connection**: In order to create a Custom Assertion, you'll need to have an **Ingestion Source** configured to your
+   Data Platform: Snowflake, BigQuery, or Redshift under the **Integrations** tab.
+
+Once these are in place, you're ready to create your Custom Assertions!
+
+### Steps
+
+1. Navigate to the Table you want to monitor
+2. Click the **Validations** tab
+
+<p align="center">
+  <img width="90%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/freshness/profile-validation-tab.png"/>
+</p>
+
+3. Click **+ Create Assertion**
+
+<p align="center">
+  <img width="90%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/custom/assertion-builder-custom-choose-type.png"/>
+</p>
+
+4. Choose **Custom**
+
+5. Configure the evaluation **schedule**. This is the frequency at which the assertion will be evaluated to produce a pass or fail result, and the times 
+   when the query will be executed.
+
+6. Provide a SQL **query** that will be used to evaluate the Table. The query should return a single row with a single column. Currently only numeric values are supported (integer and floats). The query can be as simple or as complex as you'd like, and can use any SQL features supported by your Data Warehouse. Make sure to use the fully qualified name of the Table in your query.
+
+<p align="center">
+  <img width="60%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/custom/assertion-builder-custom-query-editor.png"/>
+</p>
+
+7. Configure the evaluation **condition type**. This determines the cases in which the new assertion will fail when it is evaluated.
+
+<p align="center">
+  <img width="60%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/custom/assertion-builder-custom-condition-type.png"/>
+</p>
+
+8. Add a **description** for the assertion. This is a human-readable description of the Assertion. It should be used to describe the meaning of the Assertion, and can be used to provide additional context to users who are viewing the Assertion.
+
+<p align="center">
+  <img width="60%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/custom/assertion-builder-custom-description.png"/>
+</p>
+
+9. (Optional) Use the **Try it out** button to test your query and ensure that it returns a single row with a single column, and passes the configured condition type.
+
+<p align="center">
+  <img width="60%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/custom/assertion-builder-custom-try-it-out.png"/>
+</p>
+
+10. Click **Next**
+11. Configure actions that should be taken when the Custom Assertion passes or fails
+
+<p align="left">
+  <img width="45%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/freshness/assertion-builder-actions.png"/>
+</p>
+
+- **Raise incident**: Automatically raise a new DataHub Incident for the Table whenever the Custom Assertion is failing. This
+  may indicate that the Table is unfit for consumption. Configure Slack Notifications under **Settings** to be notified when
+  an incident is created due to an Assertion failure.
+- **Resolve incident**: Automatically resolved any incidents that were raised due to failures in this Custom Assertion. Note that
+  any other incidents will not be impacted.
+
+1.  Click **Save**.
+
+And that's it! DataHub will now begin to monitor your Custom Assertion for the table.
+
+To view the time of the next Custom Assertion evaluation, simply click **Custom** and then click on your
+new Assertion:
+
+<p align="center">
+  <img width="40%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/freshness/assertion-next-evaluation-time.png"/>
+</p>
+
+Once your assertion has run, you will begin to see Success or Failure status for the Table
+
+<p align="center">
+  <img width="90%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/custom/profile-passing-custom-assertions-expanded.png"/>
+</p>
+
+
+## Stopping a Custom Assertion
+
+In order to temporarily stop the evaluation of a Custom Assertion:
+
+1. Navigate to the **Validations** tab of the Table with the assertion
+2. Click **Custom** to open the Custom Assertions list
+3. Click the three-dot menu on the right side of the assertion you want to disable
+4. Click **Stop**
+
+<p align="left">
+  <img width="25%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/freshness/manage-assertion-menu.png"/>
+</p>
+
+To resume the Custom Assertion, simply click **Turn On**.
+
+<p align="center">
+  <img width="90%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/freshness/stopped-assertion.png"/>
+</p>
+
+
+## Creating Custom Assertions via API
+
+Under the hood, Acryl DataHub implements Custom Assertion Monitoring using two "entity" concepts:
+
+- **Assertion**: The specific expectation for the custom assertion, e.g. "The table was changed in the past 7 hours"
+  or "The table is changed on a schedule of every day by 8am". This is the "what".
+
+- **Monitor**: The process responsible for evaluating the Assertion on a given evaluation schedule and using specific
+  mechanisms. This is the "how".
+
+Note that to create or delete Assertions and Monitors for a specific entity on DataHub, you'll need the
+`Edit Assertions` and `Edit Monitors` privileges for it.
+
+#### GraphQL
+
+In order to create a Custom Assertion that is being monitored on a specific **Evaluation Schedule**, you'll need to use 2
+GraphQL mutation queries to create a Custom Assertion entity and create an Assertion Monitor entity responsible for evaluating it.
+
+Start by creating the Custom Assertion entity using the `createSqlAssertion` query and hang on to the 'urn' field of the Assertion entity
+you get back. Then continue by creating a Monitor entity using the `createAssertionMonitor`.
+
+##### Examples
+
+To create a Custom Assertion Entity that checks whether a query result is greater than 100:
+
+```json
+mutation createSqlAssertion {
+  createSqlAssertion(
+    input: {
+      entityUrn: "<urn of the table to be monitored>",
+      type: METRIC,
+      description: "<description of the custom assertion>",
+      statement: "<SQL query to be evaluated>",
+      operator: GREATER_THAN,
+      parameters: {
+        value: {
+          value: "100",
+          type: NUMBER
+        }
+      }
+    }
+  ) {
+    urn
+  }
+}
+```
+
+The supported custom assertion types are `METRIC` and `METRIC_CHANGE`. If you choose `METRIC_CHANGE`,
+you will need to provide a `changeType` parameter with either `ABSOLUTE` or `PERCENTAGE` values.
+The supported operator types are `EQUAL_TO`, `NOT_EQUAL_TO`, `GREATER_THAN`, `GREATER_THAN_OR_EQUAL_TO`, `LESS_THAN`, `LESS_THAN_OR_EQUAL_TO`, and `BETWEEN` (requires minValue, maxValue).
+The supported parameter types are `NUMBER`. 
+
+To create an Assertion Monitor Entity that evaluates the custom assertion every 8 hours:
+
+```json
+mutation createAssertionMonitor {
+  createAssertionMonitor(
+    input: {
+      entityUrn: "<urn of entity being monitored>",
+      assertionUrn: "<urn of assertion created in first query>",
+      schedule: {
+        cron: "0 */8 * * *",
+        timezone: "America/Los_Angeles"
+      },
+      parameters: {
+        type: DATASET_SQL
+      }
+    }
+  ) {
+    urn
+  }
+}
+```
+
+This entity defines _when_ to run the check (Using CRON format - every 8th hour) and _how_ to run the check (using the Information Schema).
+
+After creating the monitor, the new assertion will start to be evaluated every 8 hours in your selected timezone.
+
+You can delete assertions along with their monitors using GraphQL mutations: `deleteAssertion` and `deleteMonitor`.
+
+### Tips
+
+:::info
+**Authorization**
+
+Remember to always provide a DataHub Personal Access Token when calling the GraphQL API. To do so, just add the 'Authorization' header as follows:
+
+```
+Authorization: Bearer <personal-access-token>
+```
+
+**Exploring GraphQL API**
+
+Also, remember that you can play with an interactive version of the Acryl GraphQL API at `https://your-account-id.acryl.io/api/graphiql`
+:::

From 6c6216aaa2a3a4f9723f0e508b6ebac8ba1230f2 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Wed, 20 Sep 2023 12:00:23 -0700
Subject: [PATCH 026/156] fix(airflow): fix provider loading exception (#8861)

---
 .../airflow-plugin/setup.py                   |  3 ++-
 .../src/datahub_airflow_plugin/__init__.py    | 16 +++++++++++++
 .../datahub_airflow_plugin/hooks/datahub.py   | 23 +++++++++++++++----
 metadata-ingestion/setup.py                   |  1 -
 4 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py
index 18e605ae76ebd..47069f59c314d 100644
--- a/metadata-ingestion-modules/airflow-plugin/setup.py
+++ b/metadata-ingestion-modules/airflow-plugin/setup.py
@@ -80,7 +80,8 @@ def get_long_description():
 
 
 entry_points = {
-    "airflow.plugins": "acryl-datahub-airflow-plugin = datahub_airflow_plugin.datahub_plugin:DatahubPlugin"
+    "airflow.plugins": "acryl-datahub-airflow-plugin = datahub_airflow_plugin.datahub_plugin:DatahubPlugin",
+    "apache_airflow_provider": ["provider_info=datahub_provider:get_provider_info"],
 }
 
 
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py
index b2c45d3a1e75d..e4040e3a17dfd 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py
@@ -18,4 +18,20 @@ def get_provider_info():
         "package-name": f"{__package_name__}",
         "name": f"{__package_name__}",
         "description": "Datahub metadata collector plugin",
+        "connection-types": [
+            {
+                "hook-class-name": "datahub_airflow_plugin.hooks.datahub.DatahubRestHook",
+                "connection-type": "datahub-rest",
+            },
+            {
+                "hook-class-name": "datahub_airflow_plugin.hooks.datahub.DatahubKafkaHook",
+                "connection-type": "datahub-kafka",
+            },
+        ],
+        # Deprecated method of providing connection types, kept for backwards compatibility.
+        # We can remove with Airflow 3.
+        "hook-class-names": [
+            "datahub_airflow_plugin.hooks.datahub.DatahubRestHook",
+            "datahub_airflow_plugin.hooks.datahub.DatahubKafkaHook",
+        ],
     }
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py
index aed858c6c4df0..8fb7363f8cad1 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py
@@ -29,7 +29,7 @@ class DatahubRestHook(BaseHook):
 
     conn_name_attr = "datahub_rest_conn_id"
     default_conn_name = "datahub_rest_default"
-    conn_type = "datahub_rest"
+    conn_type = "datahub-rest"
     hook_name = "DataHub REST Server"
 
     def __init__(self, datahub_rest_conn_id: str = default_conn_name) -> None:
@@ -50,6 +50,15 @@ def get_ui_field_behaviour() -> Dict:
             },
         }
 
+    def test_connection(self) -> Tuple[bool, str]:
+        try:
+            emitter = self.make_emitter()
+            emitter.test_connection()
+        except Exception as e:
+            return False, str(e)
+
+        return True, "Successfully connected to DataHub."
+
     def _get_config(self) -> Tuple[str, Optional[str], Optional[int]]:
         conn: "Connection" = self.get_connection(self.datahub_rest_conn_id)
 
@@ -99,7 +108,7 @@ class DatahubKafkaHook(BaseHook):
 
     conn_name_attr = "datahub_kafka_conn_id"
     default_conn_name = "datahub_kafka_default"
-    conn_type = "datahub_kafka"
+    conn_type = "datahub-kafka"
     hook_name = "DataHub Kafka Sink"
 
     def __init__(self, datahub_kafka_conn_id: str = default_conn_name) -> None:
@@ -194,9 +203,15 @@ def get_underlying_hook(self) -> Union[DatahubRestHook, DatahubKafkaHook]:
 
         # We need to figure out the underlying hook type. First check the
         # conn_type. If that fails, attempt to guess using the conn id name.
-        if conn.conn_type == DatahubRestHook.conn_type:
+        if (
+            conn.conn_type == DatahubRestHook.conn_type
+            or conn.conn_type == DatahubRestHook.conn_type.replace("-", "_")
+        ):
             return DatahubRestHook(self.datahub_conn_id)
-        elif conn.conn_type == DatahubKafkaHook.conn_type:
+        elif (
+            conn.conn_type == DatahubKafkaHook.conn_type
+            or conn.conn_type == DatahubKafkaHook.conn_type.replace("-", "_")
+        ):
             return DatahubKafkaHook(self.datahub_conn_id)
         elif "rest" in self.datahub_conn_id:
             return DatahubRestHook(self.datahub_conn_id)
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index a119eba25be2a..b9169186174fa 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -643,7 +643,6 @@ def get_long_description():
         "datahub = datahub.ingestion.reporting.datahub_ingestion_run_summary_provider:DatahubIngestionRunSummaryProvider",
         "file = datahub.ingestion.reporting.file_reporter:FileReporter",
     ],
-    "apache_airflow_provider": ["provider_info=datahub_provider:get_provider_info"],
 }
 
 

From 04833dd48f1ecf3bf6e6e0e93fd27f3f81cb8a82 Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Wed, 20 Sep 2023 15:05:15 -0400
Subject: [PATCH 027/156] fix(): Fix glossary_navigation.js (#8864)

---
 .../cypress/cypress/e2e/glossary/glossary_navigation.js     | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js
index de9fa7ecda1f0..e0d2bf240d74d 100644
--- a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js
+++ b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js
@@ -20,8 +20,7 @@ describe("glossary sidebar navigation test", () => {
         cy.waitTextVisible("No documentation yet");
         cy.openThreeDotDropdown();
         cy.clickOptionWithText("Move");
-        cy.get('[role="dialog"] [data-icon="close-circle"]').click({force: true});
-        cy.get('[role="dialog"]').contains(glossaryTermGroup).click();
+        cy.get('[role="dialog"]').contains(glossaryTermGroup).click({force: true});
         cy.get('[role="dialog"]').contains(glossaryTermGroup).should("be.visible");
         cy.get("button").contains("Move").click();
         cy.waitTextVisible("Moved Glossary Term!");
@@ -33,8 +32,7 @@ describe("glossary sidebar navigation test", () => {
         cy.clickOptionWithText(glossaryTermGroup);
         cy.openThreeDotDropdown();
         cy.clickOptionWithText("Move");
-        cy.get('[role="dialog"] [data-icon="close-circle"]').click({force: true});
-        cy.get('[role="dialog"]').contains(glossaryParentGroup).click();
+        cy.get('[role="dialog"]').contains(glossaryParentGroup).click({force: true});
         cy.get('[role="dialog"]').contains(glossaryParentGroup).should("be.visible");
         cy.get("button").contains("Move").click();
         cy.waitTextVisible("Moved Term Group!");

From bf9209231e25a1ede94e0655b44506b9564b5f93 Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Wed, 20 Sep 2023 15:46:25 -0400
Subject: [PATCH 028/156] test(cypress): Managing Secrets Cypress test (#8863)

---
 .../cypress/e2e/mutations/managing_secrets.js | 105 ++++++++++++++++++
 1 file changed, 105 insertions(+)
 create mode 100644 smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js

diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js b/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js
new file mode 100644
index 0000000000000..466bb2ef0757e
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js
@@ -0,0 +1,105 @@
+const number = Math.floor(Math.random() * 100000);
+const accound_id = `account${number}`;
+const warehouse_id = `warehouse${number}`;
+const username = `user${number}`;
+const password = `password${number}`;
+const role = `role${number}`;
+const ingestion_source_name = `ingestion source ${number}`;
+
+describe("managing secrets for ingestion creation", () => {
+    it("create a secret, create ingestion source using a secret, remove a secret", () => {
+      cy.loginWithCredentials();
+      //navigate to the manage ingestion page → secrets
+      cy.goToIngestionPage();
+      cy.clickOptionWithText("Secrets");
+      //create a new secret
+      cy.clickOptionWithText("Create new secret");
+      cy.get('[role="dialog"]').contains("Create a new Secret").should("be.visible");
+      cy.get('[role="dialog"] #name').type(`secretname${number}`);
+      cy.get('[role="dialog"] #value').type(`secretvalue${number}`);
+      cy.get('[role="dialog"] #description').type(`secretdescription${number}`);
+      cy.get('#createSecretButton').click();
+      cy.waitTextVisible("Successfully created Secret!");
+      cy.waitTextVisible(`secretname${number}`);
+      cy.waitTextVisible(`secretdescription${number}`).wait(5000)//prevent issue with missing secret
+      //create an ingestion source using a secret
+      cy.goToIngestionPage();
+      cy.clickOptionWithText("Create new source");
+      cy.clickOptionWithText("Snowflake");
+      cy.waitTextVisible("Snowflake Recipe");
+      cy.get("#account_id").type(accound_id);
+      cy.get("#warehouse").type(warehouse_id);
+      cy.get("#username").type(username);
+      cy.get("#password").click().wait(1000);
+      cy.contains(`secretname${number}`).click({force: true});
+      cy.focused().blur();
+      cy.get("#role").type(role);
+      cy.get("button").contains("Next").click();
+      cy.waitTextVisible("Configure an Ingestion Schedule");
+      cy.get("button").contains("Next").click();
+      cy.waitTextVisible("Give this ingestion source a name."); 
+      cy.get('[data-testid="source-name-input"]').type(ingestion_source_name);
+      cy.get("button").contains("Save").click();
+      cy.waitTextVisible("Successfully created ingestion source!").wait(5000)//prevent issue with missing form data
+      cy.waitTextVisible(ingestion_source_name);
+      cy.get("button").contains("Pending...").should("be.visible");
+      //remove a secret
+      cy.clickOptionWithText("Secrets");
+      cy.waitTextVisible(`secretname${number}`);
+      cy.get('[data-icon="delete"]').first().click();
+      cy.waitTextVisible("Confirm Secret Removal");
+      cy.get("button").contains("Yes").click();
+      cy.waitTextVisible("Removed secret.");
+      cy.ensureTextNotPresent(`secretname${number}`);
+      cy.ensureTextNotPresent(`secretdescription${number}`);
+      //remove ingestion source
+      cy.goToIngestionPage();
+      cy.get('[data-testid="delete-button"]').first().click();
+      cy.waitTextVisible("Confirm Ingestion Source Removal");
+      cy.get("button").contains("Yes").click();
+      cy.waitTextVisible("Removed ingestion source.");
+      cy.ensureTextNotPresent(ingestion_source_name)
+      //verify secret is not present during ingestion source creation for password dropdown
+      cy.clickOptionWithText("Create new source");
+      cy.clickOptionWithText("Snowflake");
+      cy.waitTextVisible("Snowflake Recipe");
+      cy.get("#account_id").type(accound_id);
+      cy.get("#warehouse").type(warehouse_id);
+      cy.get("#username").type(username);
+      cy.get("#password").click().wait(1000);
+      cy.ensureTextNotPresent(`secretname${number}`);
+      //verify secret can be added during ingestion source creation and used successfully
+      cy.clickOptionWithText("Create Secret");    
+      cy.get('[role="dialog"]').contains("Create a new Secret").should("be.visible");
+      cy.get('[role="dialog"] #name').type(`secretname${number}`);
+      cy.get('[role="dialog"] #value').type(`secretvalue${number}`);
+      cy.get('[role="dialog"] #description').type(`secretdescription${number}`);
+      cy.get('#createSecretButton').click();
+      cy.waitTextVisible("Created secret!");
+      cy.get("#role").type(role);
+      cy.get("button").contains("Next").click();
+      cy.waitTextVisible("Configure an Ingestion Schedule");
+      cy.get("button").contains("Next").click();
+      cy.waitTextVisible("Give this ingestion source a name."); 
+      cy.get('[data-testid="source-name-input"]').type(ingestion_source_name);
+      cy.get("button").contains("Save").click();
+      cy.waitTextVisible("Successfully created ingestion source!").wait(5000)//prevent issue with missing form data
+      cy.waitTextVisible(ingestion_source_name);
+      cy.get("button").contains("Pending...").should("be.visible");
+      //Remove ingestion source and secret
+      cy.goToIngestionPage();
+      cy.get('[data-testid="delete-button"]').first().click();
+      cy.waitTextVisible("Confirm Ingestion Source Removal");
+      cy.get("button").contains("Yes").click();
+      cy.waitTextVisible("Removed ingestion source.");
+      cy.ensureTextNotPresent(ingestion_source_name)
+      cy.clickOptionWithText("Secrets");
+      cy.waitTextVisible(`secretname${number}`);
+      cy.get('[data-icon="delete"]').first().click();
+      cy.waitTextVisible("Confirm Secret Removal");
+      cy.get("button").contains("Yes").click();
+      cy.waitTextVisible("Removed secret.");
+      cy.ensureTextNotPresent(`secretname${number}`);
+      cy.ensureTextNotPresent(`secretdescription${number}`);    
+    })
+});
\ No newline at end of file

From ee7930b465f64ea1af2b7876859cc37bdce7aecb Mon Sep 17 00:00:00 2001
From: Chris Collins <chriscollins3456@gmail.com>
Date: Thu, 21 Sep 2023 09:28:36 -0400
Subject: [PATCH 029/156] feat(ui) Make certain things disabled if read only
 mode is enabled (#8870)

---
 .../src/app/domain/nestedDomains/ManageDomainsPageV2.tsx    | 1 +
 .../src/app/entity/user/UserEditProfileModal.tsx            | 6 ++++++
 datahub-web-react/src/app/settings/SettingsPage.tsx         | 3 ++-
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsPageV2.tsx b/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsPageV2.tsx
index 486169c3559d3..0e5c035df00c1 100644
--- a/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsPageV2.tsx
+++ b/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsPageV2.tsx
@@ -17,6 +17,7 @@ const PageWrapper = styled.div`
     flex: 1;
     display: flex;
     flex-direction: column;
+    overflow: hidden;
 `;
 
 const Header = styled.div`
diff --git a/datahub-web-react/src/app/entity/user/UserEditProfileModal.tsx b/datahub-web-react/src/app/entity/user/UserEditProfileModal.tsx
index e36bf1972a56e..d9314df7e11ae 100644
--- a/datahub-web-react/src/app/entity/user/UserEditProfileModal.tsx
+++ b/datahub-web-react/src/app/entity/user/UserEditProfileModal.tsx
@@ -138,6 +138,7 @@ export default function UserEditProfileModal({ visible, onClose, onSave, editMod
                         placeholder="John Smith"
                         value={data.name}
                         onChange={(event) => setData({ ...data, name: event.target.value })}
+                        disabled={readOnlyModeEnabled}
                     />
                 </Form.Item>
                 <Form.Item
@@ -150,6 +151,7 @@ export default function UserEditProfileModal({ visible, onClose, onSave, editMod
                         placeholder="Data Analyst"
                         value={data.title}
                         onChange={(event) => setData({ ...data, title: event.target.value })}
+                        disabled={readOnlyModeEnabled}
                     />
                 </Form.Item>
                 <Tooltip
@@ -180,6 +182,7 @@ export default function UserEditProfileModal({ visible, onClose, onSave, editMod
                         placeholder="Product Engineering"
                         value={data.team}
                         onChange={(event) => setData({ ...data, team: event.target.value })}
+                        disabled={readOnlyModeEnabled}
                     />
                 </Form.Item>
                 <Form.Item
@@ -203,6 +206,7 @@ export default function UserEditProfileModal({ visible, onClose, onSave, editMod
                         placeholder="john.smith@example.com"
                         value={data.email}
                         onChange={(event) => setData({ ...data, email: event.target.value })}
+                        disabled={readOnlyModeEnabled}
                     />
                 </Form.Item>
                 <Form.Item
@@ -215,6 +219,7 @@ export default function UserEditProfileModal({ visible, onClose, onSave, editMod
                         placeholder="john_smith"
                         value={data.slack}
                         onChange={(event) => setData({ ...data, slack: event.target.value })}
+                        disabled={readOnlyModeEnabled}
                     />
                 </Form.Item>
                 <Form.Item
@@ -236,6 +241,7 @@ export default function UserEditProfileModal({ visible, onClose, onSave, editMod
                         placeholder="444-999-9999"
                         value={data.phone}
                         onChange={(event) => setData({ ...data, phone: event.target.value })}
+                        disabled={readOnlyModeEnabled}
                     />
                 </Form.Item>
             </Form>
diff --git a/datahub-web-react/src/app/settings/SettingsPage.tsx b/datahub-web-react/src/app/settings/SettingsPage.tsx
index 339cc0cf44bac..06592656ac719 100644
--- a/datahub-web-react/src/app/settings/SettingsPage.tsx
+++ b/datahub-web-react/src/app/settings/SettingsPage.tsx
@@ -89,12 +89,13 @@ export const SettingsPage = () => {
     const isPoliciesEnabled = config?.policiesConfig.enabled;
     const isIdentityManagementEnabled = config?.identityManagementConfig.enabled;
     const isViewsEnabled = config?.viewsConfig.enabled;
+    const { readOnlyModeEnabled } = config.featureFlags;
 
     const showPolicies = (isPoliciesEnabled && me && me?.platformPrivileges?.managePolicies) || false;
     const showUsersGroups = (isIdentityManagementEnabled && me && me?.platformPrivileges?.manageIdentities) || false;
     const showViews = isViewsEnabled || false;
     const showOwnershipTypes = me && me?.platformPrivileges?.manageOwnershipTypes;
-    const showHomePagePosts = me && me?.platformPrivileges?.manageGlobalAnnouncements;
+    const showHomePagePosts = me && me?.platformPrivileges?.manageGlobalAnnouncements && !readOnlyModeEnabled;
 
     return (
         <PageContainer>

From 6ce35c9654eb85d94ddf86a1b81ef14139d30292 Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Thu, 21 Sep 2023 21:35:58 +0530
Subject: [PATCH 030/156] fix(ingest): fix mode lint error (#8875)

---
 metadata-ingestion/src/datahub/ingestion/source/mode.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py
index 0cf9932ba0878..a000c66a406c2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/mode.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py
@@ -746,7 +746,7 @@ def get_request():
                     # respect Retry-After
                     sleep_time = error_response.headers.get("retry-after")
                     if sleep_time is not None:
-                        time.sleep(sleep_time)
+                        time.sleep(float(sleep_time))
                     raise HTTPError429
 
                 raise http_error

From 21eb4dfc12cb0fb0591ab943edaa591b5bfa2682 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Thu, 21 Sep 2023 13:01:55 -0500
Subject: [PATCH 031/156] feat(search): update to support OpenSearch 2.x
 (#8852)

---
 build.gradle                                  |  21 +-
 datahub-graphql-core/build.gradle             |   1 +
 .../analytics/service/AnalyticsService.java   |  38 +-
 .../graphql/resolvers/ResolverUtilsTest.java  |   4 +-
 .../auth/ListAccessTokensResolverTest.java    |  10 +-
 .../GetIngestionSourceResolverTest.java       |   3 +-
 .../resolvers/mutate/SiblingsUtilsTest.java   |   4 +-
 .../datahub/graphql/utils/DateUtilTest.java   |   5 +-
 .../upgrade/config/NoCodeCleanupConfig.java   |   2 +-
 .../DeleteLegacySearchIndicesStep.java        |   6 +-
 .../nocodecleanup/NoCodeCleanupUpgrade.java   |   2 +-
 .../steps/BuildIndicesPostStep.java           |   4 +-
 .../steps/BuildIndicesPreStep.java            |  10 +-
 .../elasticsearch/steps/CleanIndicesStep.java |   2 +-
 .../system/elasticsearch/util/IndexUtils.java |  12 +-
 docker/build.gradle                           |  10 +
 docker/docker-compose-with-cassandra.yml      |   7 +-
 docker/docker-compose-without-neo4j.yml       |   7 +-
 docker/docker-compose.yml                     |   7 +-
 docker/elasticsearch/env/docker.env           |   1 +
 .../docker-compose-m1.quickstart.yml          |   7 +-
 ...er-compose-without-neo4j-m1.quickstart.yml |   7 +-
 ...ocker-compose-without-neo4j.quickstart.yml |   7 +-
 .../quickstart/docker-compose.quickstart.yml  |   7 +-
 li-utils/build.gradle                         |   1 +
 .../mxe-utils-avro-1.7/build.gradle           |   1 +
 .../java/datahub-client/build.gradle          |   1 +
 .../resources/MetadataChangeProposal.avsc     |   5 +
 .../datahub/protobuf/ProtobufDatasetTest.java |   2 +-
 .../datahub/protobuf/ProtobufUtilsTest.java   |   2 +-
 .../protobuf/model/ProtobufEnumTest.java      |   2 +-
 .../protobuf/model/ProtobufFieldTest.java     |   2 +-
 .../protobuf/model/ProtobufGraphTest.java     |   2 +-
 .../protobuf/model/ProtobufMessageTest.java   |   2 +-
 .../model/ProtobufOneOfFieldTest.java         |   2 +-
 .../protobuf/visitors/VisitContextTest.java   |   2 +-
 .../visitors/dataset/DatasetVisitorTest.java  |   2 +-
 .../dataset/DescriptionVisitorTest.java       |   2 +-
 .../visitors/dataset/DomainVisitorTest.java   |   2 +-
 .../InstitutionalMemoryVisitorTest.java       |   2 +-
 .../KafkaTopicPropertyVisitorTest.java        |   2 +-
 .../dataset/OwnershipVisitorTest.java         |   2 +-
 .../visitors/dataset/PropertyVisitorTest.java |   2 +-
 .../dataset/TermAssociationVisitorTest.java   |   2 +-
 .../ProtobufExtensionFieldVisitorTest.java    |   2 +-
 .../field/SchemaFieldVisitorTest.java         |   2 +-
 .../protobuf/visitors/tag/TagVisitorTest.java |   2 +-
 metadata-io/build.gradle                      |  15 +-
 .../graph/elastic/ESGraphQueryDAO.java        |  38 +-
 .../graph/elastic/ESGraphWriteDAO.java        |  10 +-
 .../elastic/ElasticSearchGraphService.java    |   4 +-
 .../graph/elastic/TimeFilterUtils.java        |   6 +-
 .../candidatesource/MostPopularSource.java    |  22 +-
 .../candidatesource/RecentlyEditedSource.java |  24 +-
 .../candidatesource/RecentlyViewedSource.java |  24 +-
 .../elasticsearch/ElasticSearchService.java   |   2 +-
 .../indexbuilder/ESIndexBuilder.java          |  74 ++--
 .../indexbuilder/ReindexConfig.java           |   2 +-
 .../elasticsearch/query/ESBrowseDAO.java      |  30 +-
 .../elasticsearch/query/ESSearchDAO.java      |  32 +-
 .../request/AggregationQueryBuilder.java      |   4 +-
 .../request/AutocompleteRequestHandler.java   |  18 +-
 .../query/request/PITAwareSearchRequest.java  |   4 +-
 .../query/request/SearchQueryBuilder.java     |  42 +-
 .../query/request/SearchRequestHandler.java   |  34 +-
 .../elasticsearch/update/BulkListener.java    |  12 +-
 .../elasticsearch/update/ESBulkProcessor.java |  26 +-
 .../elasticsearch/update/ESWriteDAO.java      |  18 +-
 .../metadata/search/utils/ESUtils.java        |  28 +-
 .../systemmetadata/ESSystemMetadataDAO.java   |  40 +-
 .../ElasticSearchSystemMetadataService.java   |  16 +-
 .../ElasticSearchTimeseriesAspectService.java |  40 +-
 .../TimeseriesAspectIndexBuilders.java        |   2 +-
 .../elastic/query/ESAggregatedStatsDAO.java   |  36 +-
 .../linkedin/metadata/AspectUtilsTest.java    |   2 +-
 .../metadata/ESTestConfiguration.java         | 153 --------
 .../com/linkedin/metadata/EbeanTestUtils.java |  12 +-
 .../update/BulkListenerTest.java              |   4 +-
 .../update/ESBulkProcessorTest.java           |   2 +-
 .../entity/EbeanAspectMigrationsDaoTest.java  |   2 +-
 .../entity/EbeanEntityServiceTest.java        |   8 +-
 .../ESGraphQueryDAOTest.java                  |   5 +-
 .../SearchGraphServiceTestBase.java}          |  44 ++-
 .../TimeFilterUtilsTest.java                  |   6 +-
 .../SearchGraphServiceElasticSearchTest.java  |  49 +++
 .../SearchGraphServiceOpenSearchTest.java     |  48 +++
 ...eTest.java => LineageServiceTestBase.java} | 112 +++---
 ...ceTest.java => SearchServiceTestBase.java} |  70 ++--
 ...rviceTest.java => TestEntityTestBase.java} |  66 ++--
 .../elasticsearch/ElasticSearchSuite.java     |  32 ++
 .../GoldenElasticSearchTest.java              |  44 +++
 .../IndexBuilderElasticSearchTest.java        |  30 ++
 .../LineageDataFixtureElasticSearchTest.java  |  43 ++
 .../LineageServiceElasticSearchTest.java      |  66 ++++
 .../SampleDataFixtureElasticSearchTest.java   |  45 +++
 .../SearchDAOElasticSearchTest.java           |  35 ++
 .../SearchServiceElasticSearchTest.java       |  65 ++++
 ...ystemMetadataServiceElasticSearchTest.java |  47 +++
 .../TestEntityElasticSearchTest.java          |  65 ++++
 ...eseriesAspectServiceElasticSearchTest.java |  46 +++
 .../elasticsearch/query/ESSearchDAOTest.java  | 312 ---------------
 .../GoldenTestBase.java}                      |  65 ++--
 .../LineageDataFixtureTestBase.java}          |  39 +-
 .../SampleDataFixtureTestBase.java}           | 366 +++++++++---------
 .../IndexBuilderTestBase.java}                |  65 ++--
 .../indexbuilder/MappingsBuilderTest.java     |   4 +-
 .../opensearch/GoldenOpenSearchTest.java      |  44 +++
 .../IndexBuilderOpenSearchTest.java           |  30 ++
 .../LineageDataFixtureOpenSearchTest.java     |  43 ++
 .../LineageServiceOpenSearchTest.java         |  65 ++++
 .../search/opensearch/OpenSearchSuite.java    |  31 ++
 .../SampleDataFixtureOpenSearchTest.java      |  44 +++
 .../opensearch/SearchDAOOpenSearchTest.java   |  33 ++
 .../SearchServiceOpenSearchTest.java          |  65 ++++
 .../SystemMetadataServiceOpenSearchTest.java  |  47 +++
 .../opensearch/TestEntityOpenSearchTest.java  |  65 ++++
 ...TimeseriesAspectServiceOpenSearchTest.java |  46 +++
 .../BrowseDAOTest.java}                       |  19 +-
 .../search/query/SearchDAOTestBase.java       | 307 +++++++++++++++
 .../request/AggregationQueryBuilderTest.java  |   6 +-
 .../AutocompleteRequestHandlerTest.java       |  18 +-
 .../request/CustomizedQueryHandlerTest.java   |  16 +-
 .../query/request/SearchQueryBuilderTest.java |  26 +-
 .../request/SearchRequestHandlerTest.java     |  29 +-
 .../metadata/search/utils/ESUtilsTest.java    |   2 +-
 ...ava => SystemMetadataServiceTestBase.java} |  41 +-
 .../timeline/EbeanTimelineServiceTest.java    |   2 +-
 .../TimeseriesAspectServiceTestBase.java}     |  43 +-
 .../test/fixtures/elasticsearch/Utils.java    |  22 --
 .../test/DataGenerator.java                   |   5 +-
 .../test/fixtures/search}/EntityExporter.java |  18 +-
 .../test/fixtures/search}/FixtureReader.java  |  10 +-
 .../test/fixtures/search}/FixtureWriter.java  |  24 +-
 .../fixtures/search}/LineageExporter.java     |  14 +-
 .../SampleDataFixtureConfiguration.java}      |  21 +-
 .../fixtures/search/SearchFixtureUtils.java}  |  51 ++-
 .../SearchLineageFixtureConfiguration.java}   |  16 +-
 .../test/models/Anonymized.java               |   2 +-
 .../test/models/DatasetAnonymized.java        |   2 +-
 .../test/models/GraphAnonymized.java          |   4 +-
 .../search/ElasticsearchTestContainer.java    |  42 ++
 .../test/search/OpenSearchTestContainer.java  |  43 ++
 .../test/search/SearchTestContainer.java      |  14 +
 .../test/search/SearchTestUtils.java}         |  60 ++-
 .../config/SearchCommonTestConfiguration.java |  63 +++
 .../SearchTestContainerConfiguration.java     |  88 +++++
 .../src/test/resources/testng-other.xml       |  14 +
 .../src/test/resources/testng-search.xml      |  16 +
 metadata-io/src/test/resources/testng.xml     |  14 +
 metadata-jobs/mae-consumer/build.gradle       |   1 +
 .../kafka/elasticsearch/ElasticEvent.java     |   2 +-
 .../elasticsearch/ElasticsearchConnector.java |  10 +-
 .../kafka/elasticsearch/JsonElasticEvent.java |  12 +-
 .../kafka/elasticsearch/MCEElasticEvent.java  |  12 +-
 metadata-models/build.gradle                  |   1 +
 metadata-service/auth-impl/build.gradle       |   1 +
 .../common/RestHighLevelClientFactory.java    |   6 +-
 .../factory/graphql/GraphQLEngineFactory.java |   2 +-
 .../MostPopularCandidateSourceFactory.java    |   2 +-
 .../RecentlyEditedCandidateSourceFactory.java |   2 +-
 ...ecentlySearchedCandidateSourceFactory.java |   2 +-
 .../RecentlyViewedCandidateSourceFactory.java |   2 +-
 .../BaseElasticSearchComponentsFactory.java   |   2 +-
 .../ElasticSearchBulkProcessorFactory.java    |   4 +-
 .../ElasticSearchIndexBuilderFactory.java     |   2 +-
 .../gms/factory/telemetry/DailyReport.java    |   2 +-
 .../telemetry/ScheduledAnalyticsFactory.java  |   2 +-
 ...ElasticSearchBulkProcessorFactoryTest.java |   2 +-
 .../telemetry/TelemetryUtilsTest.java         |   2 +-
 .../controller/HealthCheckController.java     |  10 +-
 .../OpenAPIAnalyticsTestConfiguration.java    |   2 +-
 .../openapi/util/OpenApiEntitiesUtilTest.java |   2 +-
 .../elastic/OperationsController.java         |   2 +-
 .../operations/OperationsResource.java        |   2 +-
 .../operations/OperationsResourceTest.java    |   5 +-
 metadata-service/services/build.gradle        |   2 -
 .../RecentlySearchedSource.java               |  24 +-
 .../systemmetadata/SystemMetadataService.java |   2 +-
 .../gms/servlet/ConfigSearchExport.java       |  20 +-
 .../java/com/datahub/gms/util/CSVWriter.java  |   4 +-
 metadata-utils/build.gradle                   |   1 +
 .../linkedin/metadata/utils/SearchUtil.java   |   4 +-
 smoke-test/cypress-dev.sh                     |   3 +-
 smoke-test/run-quickstart.sh                  |   9 +
 smoke-test/set-cypress-creds.sh               |   3 +-
 .../tests/cypress/cypress/e2e/login/login.js  |   2 +-
 186 files changed, 2923 insertions(+), 1595 deletions(-)
 delete mode 100644 metadata-io/src/test/java/com/linkedin/metadata/ESTestConfiguration.java
 rename metadata-io/src/test/java/com/linkedin/metadata/graph/{elastic => search}/ESGraphQueryDAOTest.java (98%)
 rename metadata-io/src/test/java/com/linkedin/metadata/graph/{elastic/ElasticSearchGraphServiceTest.java => search/SearchGraphServiceTestBase.java} (93%)
 rename metadata-io/src/test/java/com/linkedin/metadata/graph/{elastic => search}/TimeFilterUtilsTest.java (82%)
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/graph/search/elasticsearch/SearchGraphServiceElasticSearchTest.java
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/graph/search/opensearch/SearchGraphServiceOpenSearchTest.java
 rename metadata-io/src/test/java/com/linkedin/metadata/search/{LineageSearchServiceTest.java => LineageServiceTestBase.java} (94%)
 rename metadata-io/src/test/java/com/linkedin/metadata/search/{SearchServiceTest.java => SearchServiceTestBase.java} (92%)
 rename metadata-io/src/test/java/com/linkedin/metadata/search/{elasticsearch/ElasticSearchServiceTest.java => TestEntityTestBase.java} (86%)
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchSuite.java
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/GoldenElasticSearchTest.java
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/IndexBuilderElasticSearchTest.java
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageDataFixtureElasticSearchTest.java
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageServiceElasticSearchTest.java
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SampleDataFixtureElasticSearchTest.java
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchDAOElasticSearchTest.java
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchServiceElasticSearchTest.java
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SystemMetadataServiceElasticSearchTest.java
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TestEntityElasticSearchTest.java
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TimeseriesAspectServiceElasticSearchTest.java
 delete mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAOTest.java
 rename metadata-io/src/test/java/com/linkedin/metadata/search/{elasticsearch/fixtures/ElasticSearchGoldenTest.java => fixtures/GoldenTestBase.java} (74%)
 rename metadata-io/src/test/java/com/linkedin/metadata/search/{elasticsearch/fixtures/SearchLineageDataFixtureTests.java => fixtures/LineageDataFixtureTestBase.java} (52%)
 rename metadata-io/src/test/java/com/linkedin/metadata/search/{elasticsearch/fixtures/SampleDataFixtureTests.java => fixtures/SampleDataFixtureTestBase.java} (81%)
 rename metadata-io/src/test/java/com/linkedin/metadata/search/{elasticsearch/indexbuilder/ESIndexBuilderTest.java => indexbuilder/IndexBuilderTestBase.java} (85%)
 rename metadata-io/src/test/java/com/linkedin/metadata/search/{elasticsearch => }/indexbuilder/MappingsBuilderTest.java (98%)
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/GoldenOpenSearchTest.java
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/IndexBuilderOpenSearchTest.java
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageDataFixtureOpenSearchTest.java
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageServiceOpenSearchTest.java
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/OpenSearchSuite.java
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SampleDataFixtureOpenSearchTest.java
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchDAOOpenSearchTest.java
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchServiceOpenSearchTest.java
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SystemMetadataServiceOpenSearchTest.java
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TestEntityOpenSearchTest.java
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TimeseriesAspectServiceOpenSearchTest.java
 rename metadata-io/src/test/java/com/linkedin/metadata/search/{elasticsearch/query/ESBrowseDAOTest.java => query/BrowseDAOTest.java} (86%)
 create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java
 rename metadata-io/src/test/java/com/linkedin/metadata/search/{elasticsearch => }/query/request/AggregationQueryBuilderTest.java (94%)
 rename metadata-io/src/test/java/com/linkedin/metadata/search/{elasticsearch => }/query/request/AutocompleteRequestHandlerTest.java (88%)
 rename metadata-io/src/test/java/com/linkedin/metadata/search/{elasticsearch => }/query/request/CustomizedQueryHandlerTest.java (93%)
 rename metadata-io/src/test/java/com/linkedin/metadata/search/{elasticsearch => }/query/request/SearchQueryBuilderTest.java (95%)
 rename metadata-io/src/test/java/com/linkedin/metadata/search/{elasticsearch => }/query/request/SearchRequestHandlerTest.java (95%)
 rename metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/{ElasticSearchSystemMetadataServiceTest.java => SystemMetadataServiceTestBase.java} (84%)
 rename metadata-io/src/test/java/com/linkedin/metadata/timeseries/{elastic/ElasticSearchTimeseriesAspectServiceTest.java => search/TimeseriesAspectServiceTestBase.java} (97%)
 delete mode 100644 metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/Utils.java
 rename metadata-io/src/test/java/io/{datahub => datahubproject}/test/DataGenerator.java (99%)
 rename metadata-io/src/test/java/io/{datahub/test/fixtures/elasticsearch => datahubproject/test/fixtures/search}/EntityExporter.java (81%)
 rename metadata-io/src/test/java/io/{datahub/test/fixtures/elasticsearch => datahubproject/test/fixtures/search}/FixtureReader.java (93%)
 rename metadata-io/src/test/java/io/{datahub/test/fixtures/elasticsearch => datahubproject/test/fixtures/search}/FixtureWriter.java (75%)
 rename metadata-io/src/test/java/io/{datahub/test/fixtures/elasticsearch => datahubproject/test/fixtures/search}/LineageExporter.java (95%)
 rename metadata-io/src/test/java/{com/linkedin/metadata/ESSampleDataFixture.java => io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java} (94%)
 rename metadata-io/src/test/java/{com/linkedin/metadata/ESTestFixtureUtils.java => io/datahubproject/test/fixtures/search/SearchFixtureUtils.java} (67%)
 rename metadata-io/src/test/java/{com/linkedin/metadata/ESSearchLineageFixture.java => io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java} (95%)
 rename metadata-io/src/test/java/io/{datahub => datahubproject}/test/models/Anonymized.java (97%)
 rename metadata-io/src/test/java/io/{datahub => datahubproject}/test/models/DatasetAnonymized.java (97%)
 rename metadata-io/src/test/java/io/{datahub => datahubproject}/test/models/GraphAnonymized.java (82%)
 create mode 100644 metadata-io/src/test/java/io/datahubproject/test/search/ElasticsearchTestContainer.java
 create mode 100644 metadata-io/src/test/java/io/datahubproject/test/search/OpenSearchTestContainer.java
 create mode 100644 metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java
 rename metadata-io/src/test/java/{com/linkedin/metadata/ESTestUtils.java => io/datahubproject/test/search/SearchTestUtils.java} (74%)
 create mode 100644 metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java
 create mode 100644 metadata-io/src/test/java/io/datahubproject/test/search/config/SearchTestContainerConfiguration.java
 create mode 100644 metadata-io/src/test/resources/testng-other.xml
 create mode 100644 metadata-io/src/test/resources/testng-search.xml
 create mode 100644 metadata-io/src/test/resources/testng.xml

diff --git a/build.gradle b/build.gradle
index 1b6b82d51c2d4..07a0e6ad1f49f 100644
--- a/build.gradle
+++ b/build.gradle
@@ -8,7 +8,7 @@ buildscript {
   ext.openTelemetryVersion = '1.18.0'
   ext.neo4jVersion = '4.4.9'
   ext.testContainersVersion = '1.17.4'
-  ext.elasticsearchVersion = '7.10.2'
+  ext.elasticsearchVersion = '2.9.0' // ES 7.10, Opensearch 1.x, 2.x
   ext.jacksonVersion = '2.15.2'
   ext.jettyVersion = '9.4.46.v20220331'
   ext.playVersion = '2.8.18'
@@ -90,15 +90,15 @@ project.ext.externalDependency = [
     'ebean': 'io.ebean:ebean:' + ebeanVersion,
     'ebeanAgent': 'io.ebean:ebean-agent:' + ebeanVersion,
     'ebeanDdl': 'io.ebean:ebean-ddl-generator:' + ebeanVersion,
-    'elasticSearchRest': 'org.elasticsearch.client:elasticsearch-rest-high-level-client:' + elasticsearchVersion,
-    'elasticSearchTransport': 'org.elasticsearch.client:transport:' + elasticsearchVersion,
+    'elasticSearchRest': 'org.opensearch.client:opensearch-rest-high-level-client:' + elasticsearchVersion,
+    'elasticSearchJava': 'org.opensearch.client:opensearch-java:2.6.0',
     'findbugsAnnotations': 'com.google.code.findbugs:annotations:3.0.1',
     'graphqlJava': 'com.graphql-java:graphql-java:19.5',
     'graphqlJavaScalars': 'com.graphql-java:graphql-java-extended-scalars:19.1',
     'gson': 'com.google.code.gson:gson:2.8.9',
     'guice': 'com.google.inject:guice:4.2.3',
     'guava': 'com.google.guava:guava:32.1.2-jre',
-    'h2': 'com.h2database:h2:2.1.214',
+    'h2': 'com.h2database:h2:2.2.224',
     'hadoopCommon':'org.apache.hadoop:hadoop-common:2.7.2',
     'hadoopMapreduceClient':'org.apache.hadoop:hadoop-mapreduce-client-core:2.7.2',
     "hadoopClient": "org.apache.hadoop:hadoop-client:$hadoop3Version",
@@ -202,13 +202,15 @@ project.ext.externalDependency = [
     'springActuator': "org.springframework.boot:spring-boot-starter-actuator:$springBootVersion",
     'swaggerAnnotations': 'io.swagger.core.v3:swagger-annotations:2.1.12',
     'swaggerCli': 'io.swagger.codegen.v3:swagger-codegen-cli:3.0.41',
-    'testng': 'org.testng:testng:7.3.0',
+    'testngJava8': 'org.testng:testng:7.5.1',
+    'testng': 'org.testng:testng:7.8.0',
     'testContainers': 'org.testcontainers:testcontainers:' + testContainersVersion,
     'testContainersJunit': 'org.testcontainers:junit-jupiter:' + testContainersVersion,
     'testContainersPostgresql':'org.testcontainers:postgresql:' + testContainersVersion,
     'testContainersElasticsearch': 'org.testcontainers:elasticsearch:' + testContainersVersion,
     'testContainersCassandra': 'org.testcontainers:cassandra:' + testContainersVersion,
     'testContainersKafka': 'org.testcontainers:kafka:' + testContainersVersion,
+    'testContainersOpenSearch': 'org.opensearch:opensearch-testcontainers:2.0.0',
     'typesafeConfig':'com.typesafe:config:1.4.1',
     'wiremock':'com.github.tomakehurst:wiremock:2.10.0',
     'zookeeper': 'org.apache.zookeeper:zookeeper:3.4.14',
@@ -257,7 +259,6 @@ subprojects {
 
   plugins.withType(JavaPlugin) {
     dependencies {
-      testImplementation externalDependency.testng
       constraints {
         implementation('io.netty:netty-all:4.1.86.Final')
         implementation('org.apache.commons:commons-compress:1.21')
@@ -268,12 +269,6 @@ subprojects {
       }
     }
 
-    tasks.withType(Test) {
-      if (!name.startsWith('integ')) {
-        useTestNG()
-      }
-    }
-
     checkstyle {
       configDirectory = file("${project.rootDir}/gradle/checkstyle")
       sourceSets = [ getProject().sourceSets.main, getProject().sourceSets.test ]
@@ -292,6 +287,8 @@ subprojects {
     javaLauncher = javaToolchains.launcherFor {
       languageVersion = JavaLanguageVersion.of(11)
     }
+    // https://docs.gradle.org/current/userguide/performance.html
+    maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
   }
 
   afterEvaluate {
diff --git a/datahub-graphql-core/build.gradle b/datahub-graphql-core/build.gradle
index 89ba8f17b6aeb..fba0031351b58 100644
--- a/datahub-graphql-core/build.gradle
+++ b/datahub-graphql-core/build.gradle
@@ -24,6 +24,7 @@ dependencies {
     annotationProcessor externalDependency.lombok
 
     testImplementation externalDependency.mockito
+    testImplementation externalDependency.testng
 }
 
 graphqlCodegen {
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java
index 44b1779f8b006..4135a7b0da148 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java
@@ -20,25 +20,25 @@
 import javax.annotation.Nonnull;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.aggregations.AggregationBuilder;
-import org.elasticsearch.search.aggregations.AggregationBuilders;
-import org.elasticsearch.search.aggregations.Aggregations;
-import org.elasticsearch.search.aggregations.BucketOrder;
-import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
-import org.elasticsearch.search.aggregations.bucket.filter.Filter;
-import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval;
-import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
-import org.elasticsearch.search.aggregations.bucket.terms.Terms;
-import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
-import org.elasticsearch.search.aggregations.metrics.Cardinality;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.QueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.search.aggregations.AggregationBuilder;
+import org.opensearch.search.aggregations.AggregationBuilders;
+import org.opensearch.search.aggregations.Aggregations;
+import org.opensearch.search.aggregations.BucketOrder;
+import org.opensearch.search.aggregations.bucket.MultiBucketsAggregation;
+import org.opensearch.search.aggregations.bucket.filter.Filter;
+import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval;
+import org.opensearch.search.aggregations.bucket.histogram.Histogram;
+import org.opensearch.search.aggregations.bucket.terms.Terms;
+import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
+import org.opensearch.search.aggregations.metrics.Cardinality;
+import org.opensearch.search.builder.SearchSourceBuilder;
 
 
 @Slf4j
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java
index c391615db9268..7cd548a4790ba 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java
@@ -15,7 +15,6 @@
 import com.linkedin.metadata.query.filter.CriterionArray;
 import com.linkedin.metadata.query.filter.Filter;
 import graphql.schema.DataFetchingEnvironment;
-import junit.framework.TestCase;
 import org.testng.annotations.Test;
 import org.mockito.Mockito;
 
@@ -24,9 +23,10 @@
 import java.util.stream.Collectors;
 
 import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*;
+import static org.testng.AssertJUnit.assertEquals;
 
 
-public class ResolverUtilsTest extends TestCase {
+public class ResolverUtilsTest {
 
   @Test
   public void testCriterionFromFilter() throws Exception {
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java
index 8c23335b7e9d3..54b8d23bab301 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java
@@ -10,15 +10,15 @@
 import com.linkedin.metadata.Constants;
 import graphql.schema.DataFetchingEnvironment;
 import java.util.Collections;
-import junit.framework.TestCase;
 import org.mockito.Mockito;
+import org.testng.annotations.Test;
 
 import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*;
 
 
-public class ListAccessTokensResolverTest extends TestCase {
+public class ListAccessTokensResolverTest {
 
-//  @Test
+  @Test
   public void testGetSuccess() throws Exception {
     final DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class);
     final QueryContext mockAllowContext = TestUtils.getMockAllowContext();
@@ -36,13 +36,13 @@ public void testGetSuccess() throws Exception {
     Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input);
 
     final EntityClient mockClient = Mockito.mock(EntityClient.class);
-    Mockito.when(mockClient.filter(
+    Mockito.when(Mockito.eq(mockClient.filter(
         Mockito.eq(Constants.ACCESS_TOKEN_ENTITY_NAME),
             Mockito.eq(buildFilter(filters, Collections.emptyList())),
             Mockito.notNull(),
             Mockito.eq(input.getStart()),
             Mockito.eq(input.getCount()),
-            Mockito.eq(getAuthentication(mockEnv))))
+            Mockito.eq(getAuthentication(mockEnv)))))
         .thenReturn(null);
 
     final ListAccessTokensResolver resolver = new ListAccessTokensResolver(mockClient);
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/GetIngestionSourceResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/GetIngestionSourceResolverTest.java
index 2d9f43029c479..ebafd1782e000 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/GetIngestionSourceResolverTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/GetIngestionSourceResolverTest.java
@@ -14,11 +14,12 @@
 import com.linkedin.r2.RemoteInvocationException;
 import graphql.schema.DataFetchingEnvironment;
 import java.util.HashSet;
+
 import org.mockito.Mockito;
 import org.testng.annotations.Test;
 
-import static org.testng.Assert.*;
 import static com.linkedin.datahub.graphql.resolvers.ingest.IngestTestUtils.*;
+import static org.testng.Assert.assertThrows;
 
 public class GetIngestionSourceResolverTest {
 
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/SiblingsUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/SiblingsUtilsTest.java
index d8325e9a74740..1adf7b1200574 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/SiblingsUtilsTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/mutate/SiblingsUtilsTest.java
@@ -6,7 +6,6 @@
 import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.datahub.graphql.resolvers.mutate.util.SiblingsUtils;
 import com.linkedin.metadata.entity.EntityService;
-import junit.framework.TestCase;
 import org.mockito.Mockito;
 import org.testng.annotations.Test;
 
@@ -14,8 +13,9 @@
 import java.util.Optional;
 
 import static com.linkedin.metadata.Constants.SIBLINGS_ASPECT_NAME;
+import static org.testng.AssertJUnit.assertEquals;
 
-public class SiblingsUtilsTest extends TestCase {
+public class SiblingsUtilsTest {
 
   private static final String TEST_DATASET_URN1 = "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created,PROD)";
   private static final String TEST_DATASET_URN2 = "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created2,PROD)";
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/DateUtilTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/DateUtilTest.java
index 989ebc18e9f6c..0a58ff88586c6 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/DateUtilTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/DateUtilTest.java
@@ -1,12 +1,13 @@
 package com.linkedin.datahub.graphql.utils;
 
 import com.linkedin.datahub.graphql.util.DateUtil;
-import junit.framework.TestCase;
 import org.joda.time.DateTime;
 import org.mockito.Mockito;
 import org.testng.annotations.Test;
 
-public class DateUtilTest extends TestCase {
+import static org.testng.AssertJUnit.assertEquals;
+
+public class DateUtilTest {
 
     private DateTime setTimeParts(int dayOfMonth, boolean zeroTime) {
         DateTime result = new DateTime()
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeCleanupConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeCleanupConfig.java
index 0fb8b0eb6e20f..23ea81009fa1d 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeCleanupConfig.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeCleanupConfig.java
@@ -5,7 +5,7 @@
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import io.ebean.Database;
 import javax.annotation.Nonnull;
-import org.elasticsearch.client.RestHighLevelClient;
+import org.opensearch.client.RestHighLevelClient;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.context.ApplicationContext;
 import org.springframework.context.annotation.Bean;
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocodecleanup/DeleteLegacySearchIndicesStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocodecleanup/DeleteLegacySearchIndicesStep.java
index 15bbe40d1e566..9a64d5fe1810c 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocodecleanup/DeleteLegacySearchIndicesStep.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocodecleanup/DeleteLegacySearchIndicesStep.java
@@ -7,9 +7,9 @@
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import java.util.function.Function;
 import lombok.RequiredArgsConstructor;
-import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
+import org.opensearch.action.admin.indices.delete.DeleteIndexRequest;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
 
 
 // Do we need SQL-tech specific migration paths?
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocodecleanup/NoCodeCleanupUpgrade.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocodecleanup/NoCodeCleanupUpgrade.java
index 2b5e23c5f8269..a5d8d6ce9b666 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocodecleanup/NoCodeCleanupUpgrade.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocodecleanup/NoCodeCleanupUpgrade.java
@@ -9,7 +9,7 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
-import org.elasticsearch.client.RestHighLevelClient;
+import org.opensearch.client.RestHighLevelClient;
 
 
 public class NoCodeCleanupUpgrade implements Upgrade {
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPostStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPostStep.java
index 465a5fe342667..2feca1f27e625 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPostStep.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPostStep.java
@@ -16,8 +16,8 @@
 
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest;
-import org.elasticsearch.client.RequestOptions;
+import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest;
+import org.opensearch.client.RequestOptions;
 
 import static com.linkedin.datahub.upgrade.system.elasticsearch.util.IndexUtils.INDEX_BLOCKS_WRITE_SETTING;
 import static com.linkedin.datahub.upgrade.system.elasticsearch.util.IndexUtils.getAllReindexConfigs;
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java
index 6f2f3a8bd727c..82b9428c89fb8 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java
@@ -19,10 +19,10 @@
 import com.linkedin.metadata.shared.ElasticSearchIndexed;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.ElasticsearchStatusException;
-import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.indices.ResizeRequest;
+import org.opensearch.OpenSearchStatusException;
+import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.indices.ResizeRequest;
 
 import static com.linkedin.datahub.upgrade.system.elasticsearch.util.IndexUtils.INDEX_BLOCKS_WRITE_SETTING;
 import static com.linkedin.datahub.upgrade.system.elasticsearch.util.IndexUtils.getAllReindexConfigs;
@@ -97,7 +97,7 @@ private boolean blockWrites(String indexName) throws InterruptedException, IOExc
       ack = _esComponents.getSearchClient().indices()
               .putSettings(request, RequestOptions.DEFAULT).isAcknowledged();
       log.info("Updated index {} with new settings. Settings: {}, Acknowledged: {}", indexName, indexSettings, ack);
-    } catch (ElasticsearchStatusException | IOException ese) {
+    } catch (OpenSearchStatusException | IOException ese) {
       // Cover first run case, indices won't exist so settings updates won't work nor will the rest of the preConfigure steps.
       // Since no data are in there they are skippable.
       // Have to hack around HighLevelClient not sending the actual Java type nor having an easy way to extract it :(
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java
index f60aa283c0140..bb042bac6df95 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java
@@ -9,7 +9,7 @@
 import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
 import com.linkedin.metadata.shared.ElasticSearchIndexed;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.client.RestHighLevelClient;
+import org.opensearch.client.RestHighLevelClient;
 
 import java.util.List;
 import java.util.function.Function;
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java
index fa414798ccfea..4b04feac62cbf 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java
@@ -4,12 +4,12 @@
 import com.linkedin.metadata.shared.ElasticSearchIndexed;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.lang3.NotImplementedException;
-import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest;
-import org.elasticsearch.action.admin.indices.settings.get.GetSettingsRequest;
-import org.elasticsearch.action.admin.indices.settings.get.GetSettingsResponse;
-import org.elasticsearch.client.GetAliasesResponse;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
+import org.opensearch.action.admin.indices.alias.get.GetAliasesRequest;
+import org.opensearch.action.admin.indices.settings.get.GetSettingsRequest;
+import org.opensearch.action.admin.indices.settings.get.GetSettingsResponse;
+import org.opensearch.client.GetAliasesResponse;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
 
 import java.io.IOException;
 import java.util.ArrayList;
diff --git a/docker/build.gradle b/docker/build.gradle
index ae101fe1defc5..0faea626e982d 100644
--- a/docker/build.gradle
+++ b/docker/build.gradle
@@ -38,6 +38,16 @@ task quickstart(type: Exec, dependsOn: ':metadata-ingestion:install') {
     // environment "ACTIONS_VERSION", 'alpine3.17-slim'
     // environment "DATAHUB_ACTIONS_IMAGE", 'nginx'
 
+    // Elastic
+    // environment "DATAHUB_SEARCH_IMAGE", 'elasticsearch'
+    // environment "DATAHUB_SEARCH_TAG", '7.10.1'
+
+    // OpenSearch
+    environment "DATAHUB_SEARCH_IMAGE", 'opensearchproject/opensearch'
+    environment "DATAHUB_SEARCH_TAG", '2.9.0'
+    environment "XPACK_SECURITY_ENABLED", 'plugins.security.disabled=true'
+    environment "USE_AWS_ELASTICSEARCH", 'true'
+
     def cmd = [
             'source ../metadata-ingestion/venv/bin/activate && ',
             'datahub docker quickstart',
diff --git a/docker/docker-compose-with-cassandra.yml b/docker/docker-compose-with-cassandra.yml
index 08f8cc1ec9c45..9543e67da07f2 100644
--- a/docker/docker-compose-with-cassandra.yml
+++ b/docker/docker-compose-with-cassandra.yml
@@ -96,6 +96,9 @@ services:
       context: ../
       dockerfile: docker/elasticsearch-setup/Dockerfile
     env_file: elasticsearch-setup/env/docker.env
+    environment:
+      - ELASTICSEARCH_USE_SSL=${ELASTICSEARCH_USE_SSL:-false}
+      - USE_AWS_ELASTICSEARCH=${USE_AWS_ELASTICSEARCH:-false}
     depends_on:
       elasticsearch:
         condition: service_healthy
@@ -117,13 +120,13 @@ services:
   elasticsearch:
     container_name: elasticsearch
     hostname: elasticsearch
-    image: elasticsearch:7.10.1
+    image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1}
     ports:
       - 9200:9200
     env_file: elasticsearch/env/docker.env
     environment:
       - discovery.type=single-node
-      - xpack.security.enabled=false
+      - ${XPACK_SECURITY_ENABLED:-xpack.security.enabled=false}
     healthcheck:
       test: curl -sS --fail http://elasticsearch:9200/_cluster/health?wait_for_status=yellow&timeout=0s
       start_period: 5s
diff --git a/docker/docker-compose-without-neo4j.yml b/docker/docker-compose-without-neo4j.yml
index 0b2e4f76b8fa9..022362782f742 100644
--- a/docker/docker-compose-without-neo4j.yml
+++ b/docker/docker-compose-without-neo4j.yml
@@ -81,6 +81,9 @@ services:
       context: ../
       dockerfile: docker/elasticsearch-setup/Dockerfile
     env_file: elasticsearch-setup/env/docker.env
+    environment:
+      - ELASTICSEARCH_USE_SSL=${ELASTICSEARCH_USE_SSL:-false}
+      - USE_AWS_ELASTICSEARCH=${USE_AWS_ELASTICSEARCH:-false}
     depends_on:
       elasticsearch:
         condition: service_healthy
@@ -104,13 +107,13 @@ services:
   elasticsearch:
     container_name: elasticsearch
     hostname: elasticsearch
-    image: elasticsearch:7.10.1
+    image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1}
     ports:
     - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200
     env_file: elasticsearch/env/docker.env
     environment:
     - discovery.type=single-node
-    - xpack.security.enabled=false
+    - ${XPACK_SECURITY_ENABLED:-xpack.security.enabled=false}
     deploy:
       resources:
         limits:
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index d07ea5fa88f8b..a486689e050a2 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -83,6 +83,9 @@ services:
       context: ../
       dockerfile: docker/elasticsearch-setup/Dockerfile
     env_file: elasticsearch-setup/env/docker.env
+    environment:
+      - ELASTICSEARCH_USE_SSL=${ELASTICSEARCH_USE_SSL:-false}
+      - USE_AWS_ELASTICSEARCH=${USE_AWS_ELASTICSEARCH:-false}
     depends_on:
       elasticsearch:
         condition: service_healthy
@@ -109,13 +112,13 @@ services:
   elasticsearch:
     container_name: elasticsearch
     hostname: elasticsearch
-    image: elasticsearch:7.10.1
+    image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1}
     ports:
     - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200
     env_file: elasticsearch/env/docker.env
     environment:
     - discovery.type=single-node
-    - xpack.security.enabled=false
+    - ${XPACK_SECURITY_ENABLED:-xpack.security.enabled=false}
     deploy:
       resources:
         limits:
diff --git a/docker/elasticsearch/env/docker.env b/docker/elasticsearch/env/docker.env
index 4b1f0215ea6c8..46b5836dedd28 100644
--- a/docker/elasticsearch/env/docker.env
+++ b/docker/elasticsearch/env/docker.env
@@ -1 +1,2 @@
 ES_JAVA_OPTS="-Xms256m -Xmx512m -Dlog4j2.formatMsgNoLookups=true"
+OPENSEARCH_JAVA_OPTS="-Xms512m -Xmx512m -Dlog4j2.formatMsgNoLookups=true"
\ No newline at end of file
diff --git a/docker/quickstart/docker-compose-m1.quickstart.yml b/docker/quickstart/docker-compose-m1.quickstart.yml
index 38418bc8c41b9..89e9aaa0defd6 100644
--- a/docker/quickstart/docker-compose-m1.quickstart.yml
+++ b/docker/quickstart/docker-compose-m1.quickstart.yml
@@ -161,8 +161,9 @@ services:
           memory: 1G
     environment:
     - discovery.type=single-node
-    - xpack.security.enabled=false
+    - ${XPACK_SECURITY_ENABLED:-xpack.security.enabled=false}
     - ES_JAVA_OPTS=-Xms256m -Xmx512m -Dlog4j2.formatMsgNoLookups=true
+    - OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m -Dlog4j2.formatMsgNoLookups=true
     healthcheck:
       interval: 1s
       retries: 3
@@ -170,7 +171,7 @@ services:
       test: curl -sS --fail http://elasticsearch:$${DATAHUB_MAPPED_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s
       timeout: 5s
     hostname: elasticsearch
-    image: elasticsearch:7.10.1
+    image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1}
     ports:
     - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200
     volumes:
@@ -181,6 +182,8 @@ services:
       elasticsearch:
         condition: service_healthy
     environment:
+    - ELASTICSEARCH_USE_SSL=${ELASTICSEARCH_USE_SSL:-false}
+    - USE_AWS_ELASTICSEARCH=${USE_AWS_ELASTICSEARCH:-false}
     - ELASTICSEARCH_HOST=elasticsearch
     - ELASTICSEARCH_PORT=9200
     - ELASTICSEARCH_PROTOCOL=http
diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml
index cf879faa6a3f0..f6284edc83648 100644
--- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml
+++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml
@@ -154,8 +154,9 @@ services:
           memory: 1G
     environment:
     - discovery.type=single-node
-    - xpack.security.enabled=false
+    - ${XPACK_SECURITY_ENABLED:-xpack.security.enabled=false}
     - ES_JAVA_OPTS=-Xms256m -Xmx512m -Dlog4j2.formatMsgNoLookups=true
+    - OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m -Dlog4j2.formatMsgNoLookups=true
     healthcheck:
       interval: 1s
       retries: 3
@@ -163,7 +164,7 @@ services:
       test: curl -sS --fail http://elasticsearch:$${DATAHUB_MAPPED_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s
       timeout: 5s
     hostname: elasticsearch
-    image: elasticsearch:7.10.1
+    image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1}
     ports:
     - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200
     volumes:
@@ -174,6 +175,8 @@ services:
       elasticsearch:
         condition: service_healthy
     environment:
+    - ELASTICSEARCH_USE_SSL=${ELASTICSEARCH_USE_SSL:-false}
+    - USE_AWS_ELASTICSEARCH=${USE_AWS_ELASTICSEARCH:-false}
     - ELASTICSEARCH_HOST=elasticsearch
     - ELASTICSEARCH_PORT=9200
     - ELASTICSEARCH_PROTOCOL=http
diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml
index 007830078d2b4..4e3503e35c0db 100644
--- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml
+++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml
@@ -154,8 +154,9 @@ services:
           memory: 1G
     environment:
     - discovery.type=single-node
-    - xpack.security.enabled=false
+    - ${XPACK_SECURITY_ENABLED:-xpack.security.enabled=false}
     - ES_JAVA_OPTS=-Xms256m -Xmx512m -Dlog4j2.formatMsgNoLookups=true
+    - OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m -Dlog4j2.formatMsgNoLookups=true
     healthcheck:
       interval: 1s
       retries: 3
@@ -163,7 +164,7 @@ services:
       test: curl -sS --fail http://elasticsearch:$${DATAHUB_MAPPED_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s
       timeout: 5s
     hostname: elasticsearch
-    image: elasticsearch:7.10.1
+    image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1}
     ports:
     - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200
     volumes:
@@ -174,6 +175,8 @@ services:
       elasticsearch:
         condition: service_healthy
     environment:
+    - ELASTICSEARCH_USE_SSL=${ELASTICSEARCH_USE_SSL:-false}
+    - USE_AWS_ELASTICSEARCH=${USE_AWS_ELASTICSEARCH:-false}
     - ELASTICSEARCH_HOST=elasticsearch
     - ELASTICSEARCH_PORT=9200
     - ELASTICSEARCH_PROTOCOL=http
diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml
index 390543b92123f..e2f52064389e0 100644
--- a/docker/quickstart/docker-compose.quickstart.yml
+++ b/docker/quickstart/docker-compose.quickstart.yml
@@ -161,8 +161,9 @@ services:
           memory: 1G
     environment:
     - discovery.type=single-node
-    - xpack.security.enabled=false
+    - ${XPACK_SECURITY_ENABLED:-xpack.security.enabled=false}
     - ES_JAVA_OPTS=-Xms256m -Xmx512m -Dlog4j2.formatMsgNoLookups=true
+    - OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m -Dlog4j2.formatMsgNoLookups=true
     healthcheck:
       interval: 1s
       retries: 3
@@ -170,7 +171,7 @@ services:
       test: curl -sS --fail http://elasticsearch:$${DATAHUB_MAPPED_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s
       timeout: 5s
     hostname: elasticsearch
-    image: elasticsearch:7.10.1
+    image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1}
     ports:
     - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200
     volumes:
@@ -181,6 +182,8 @@ services:
       elasticsearch:
         condition: service_healthy
     environment:
+    - ELASTICSEARCH_USE_SSL=${ELASTICSEARCH_USE_SSL:-false}
+    - USE_AWS_ELASTICSEARCH=${USE_AWS_ELASTICSEARCH:-false}
     - ELASTICSEARCH_HOST=elasticsearch
     - ELASTICSEARCH_PORT=9200
     - ELASTICSEARCH_PROTOCOL=http
diff --git a/li-utils/build.gradle b/li-utils/build.gradle
index 8f526cffba094..1d5222e39185a 100644
--- a/li-utils/build.gradle
+++ b/li-utils/build.gradle
@@ -28,6 +28,7 @@ dependencies {
     testImplementation externalDependency.commonsIo
     testImplementation project(':test-models')
     testImplementation project(path: ':test-models', configuration: 'testDataTemplate')
+    testImplementation externalDependency.testngJava8
 }
 
 idea {
diff --git a/metadata-events/mxe-utils-avro-1.7/build.gradle b/metadata-events/mxe-utils-avro-1.7/build.gradle
index 82249d393578c..3b137965d6c19 100644
--- a/metadata-events/mxe-utils-avro-1.7/build.gradle
+++ b/metadata-events/mxe-utils-avro-1.7/build.gradle
@@ -5,6 +5,7 @@ dependencies {
   api project(':metadata-models')
   api spec.product.pegasus.dataAvro1_6
 
+  testImplementation externalDependency.testng
   testImplementation project(':test-models')
   testImplementation project(path: ':test-models', configuration: 'testDataTemplate')
 
diff --git a/metadata-integration/java/datahub-client/build.gradle b/metadata-integration/java/datahub-client/build.gradle
index fc72fc4257491..95de3cdb3c526 100644
--- a/metadata-integration/java/datahub-client/build.gradle
+++ b/metadata-integration/java/datahub-client/build.gradle
@@ -49,6 +49,7 @@ dependencies {
   annotationProcessor externalDependency.lombok
   // VisibleForTesting
   compileOnly externalDependency.guava
+  testImplementation externalDependency.testngJava8
   testImplementation externalDependency.mockito
   testImplementation externalDependency.mockServer
   testImplementation externalDependency.mockServerClient
diff --git a/metadata-integration/java/datahub-client/src/main/resources/MetadataChangeProposal.avsc b/metadata-integration/java/datahub-client/src/main/resources/MetadataChangeProposal.avsc
index 6a723090fda07..64216636af26d 100644
--- a/metadata-integration/java/datahub-client/src/main/resources/MetadataChangeProposal.avsc
+++ b/metadata-integration/java/datahub-client/src/main/resources/MetadataChangeProposal.avsc
@@ -143,6 +143,11 @@
         "type" : [ "string", "null" ],
         "doc" : "The last run id that produced the metadata. Populated in case of batch-ingestion.",
         "default" : "no-run-id-provided"
+      }, {
+        "name" : "pipelineName",
+        "type" : [ "null", "string" ],
+        "doc" : "The ingestion pipeline id that produced the metadata. Populated in case of batch ingestion.",
+        "default" : null
       }, {
         "name" : "registryName",
         "type" : [ "null", "string" ],
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufDatasetTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufDatasetTest.java
index 748990752f45b..bbb8e532f1033 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufDatasetTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufDatasetTest.java
@@ -26,7 +26,7 @@
 import datahub.protobuf.model.ProtobufField;
 import datahub.protobuf.visitors.ProtobufModelVisitor;
 import datahub.protobuf.visitors.VisitContext;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.util.Set;
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java
index 58e78435a43a5..3a00edca8284a 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java
@@ -3,7 +3,7 @@
 import com.google.protobuf.DescriptorProtos;
 import com.google.protobuf.ExtensionRegistry;
 import datahub.protobuf.model.ProtobufGraph;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufEnumTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufEnumTest.java
index 3696f5795e1f9..7c98077690d66 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufEnumTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufEnumTest.java
@@ -5,7 +5,7 @@
 import com.google.protobuf.DescriptorProtos.FileDescriptorProto;
 import com.linkedin.schema.EnumType;
 import com.linkedin.schema.SchemaFieldDataType;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.List;
 import java.util.Set;
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufFieldTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufFieldTest.java
index a21acf7f6c113..543b815f7f72b 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufFieldTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufFieldTest.java
@@ -17,7 +17,7 @@
 import com.linkedin.schema.SchemaMetadata;
 import com.linkedin.schema.StringType;
 import datahub.protobuf.ProtobufDataset;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.util.Arrays;
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufGraphTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufGraphTest.java
index a7e6dd035160c..80ffafff3f451 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufGraphTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufGraphTest.java
@@ -1,7 +1,7 @@
 package datahub.protobuf.model;
 
 import com.google.protobuf.DescriptorProtos.FileDescriptorSet;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.util.HashSet;
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufMessageTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufMessageTest.java
index 035c16552aeb5..e961b6ffd2d61 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufMessageTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufMessageTest.java
@@ -5,7 +5,7 @@
 import com.linkedin.schema.MapType;
 import com.linkedin.schema.RecordType;
 import com.linkedin.schema.SchemaFieldDataType;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.List;
 import java.util.Set;
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufOneOfFieldTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufOneOfFieldTest.java
index f9b168437643b..438e0a79206bd 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufOneOfFieldTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufOneOfFieldTest.java
@@ -6,7 +6,7 @@
 import com.google.protobuf.DescriptorProtos.OneofDescriptorProto;
 import com.linkedin.schema.SchemaFieldDataType;
 import com.linkedin.schema.UnionType;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.List;
 import java.util.Set;
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/VisitContextTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/VisitContextTest.java
index 9645c6b66ef5f..ceebefb3a207e 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/VisitContextTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/VisitContextTest.java
@@ -5,7 +5,7 @@
 import datahub.protobuf.model.ProtobufElement;
 import datahub.protobuf.model.ProtobufGraph;
 import org.jgrapht.GraphPath;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.util.List;
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DatasetVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DatasetVisitorTest.java
index 165823d8e4925..fb51f42a6c759 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DatasetVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DatasetVisitorTest.java
@@ -2,7 +2,7 @@
 
 import com.linkedin.common.urn.DatasetUrn;
 import com.linkedin.data.template.RecordTemplate;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.net.URISyntaxException;
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DescriptionVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DescriptionVisitorTest.java
index c5c20f8928ec3..4edc65b29d663 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DescriptionVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DescriptionVisitorTest.java
@@ -1,7 +1,7 @@
 package datahub.protobuf.visitors.dataset;
 
 import datahub.protobuf.model.ProtobufGraph;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.util.List;
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DomainVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DomainVisitorTest.java
index 0420953a647cb..b3fa2c8fd081b 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DomainVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DomainVisitorTest.java
@@ -2,7 +2,7 @@
 
 import com.linkedin.common.urn.Urn;
 import datahub.protobuf.model.ProtobufGraph;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.util.List;
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/InstitutionalMemoryVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/InstitutionalMemoryVisitorTest.java
index a313681c5a5a0..09fc0a3765436 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/InstitutionalMemoryVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/InstitutionalMemoryVisitorTest.java
@@ -2,7 +2,7 @@
 
 import com.linkedin.common.InstitutionalMemoryMetadata;
 import com.linkedin.common.url.Url;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.util.List;
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/KafkaTopicPropertyVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/KafkaTopicPropertyVisitorTest.java
index 84e7eb19f893b..971500b5f43a2 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/KafkaTopicPropertyVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/KafkaTopicPropertyVisitorTest.java
@@ -2,7 +2,7 @@
 
 import com.linkedin.data.template.StringMap;
 import com.linkedin.dataset.DatasetProperties;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.util.List;
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/OwnershipVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/OwnershipVisitorTest.java
index cf2649e86dc43..b087c683f9ffe 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/OwnershipVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/OwnershipVisitorTest.java
@@ -6,7 +6,7 @@
 import com.linkedin.common.OwnershipType;
 import com.linkedin.common.urn.Urn;
 import datahub.protobuf.model.ProtobufGraph;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.util.List;
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/PropertyVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/PropertyVisitorTest.java
index 2316416729bef..dc3647cdf34c8 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/PropertyVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/PropertyVisitorTest.java
@@ -2,7 +2,7 @@
 
 import com.linkedin.data.template.StringMap;
 import com.linkedin.dataset.DatasetProperties;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.util.List;
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/TermAssociationVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/TermAssociationVisitorTest.java
index 04fd52cf82e84..c140a798ef6e6 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/TermAssociationVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/TermAssociationVisitorTest.java
@@ -2,7 +2,7 @@
 
 import com.linkedin.common.GlossaryTermAssociation;
 import com.linkedin.common.urn.GlossaryTermUrn;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.util.List;
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitorTest.java
index 0a1928310bfc2..57a8cf1d63cd2 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitorTest.java
@@ -15,7 +15,7 @@
 import com.linkedin.schema.StringType;
 import com.linkedin.util.Pair;
 import datahub.protobuf.ProtobufDataset;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.net.URISyntaxException;
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/SchemaFieldVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/SchemaFieldVisitorTest.java
index 6c855e70d7f37..1da29b5320637 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/SchemaFieldVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/SchemaFieldVisitorTest.java
@@ -7,7 +7,7 @@
 import com.linkedin.schema.UnionType;
 import com.linkedin.util.Pair;
 import datahub.protobuf.ProtobufDataset;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.util.List;
diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/tag/TagVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/tag/TagVisitorTest.java
index 6fe1098f5e99a..84ab1312a7d8a 100644
--- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/tag/TagVisitorTest.java
+++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/tag/TagVisitorTest.java
@@ -3,7 +3,7 @@
 import com.linkedin.tag.TagProperties;
 import datahub.protobuf.visitors.tags.TagVisitor;
 import datahub.event.MetadataChangeProposalWrapper;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.util.List;
diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle
index a2c643516dce6..ad54cf6524398 100644
--- a/metadata-io/build.gradle
+++ b/metadata-io/build.gradle
@@ -31,7 +31,7 @@ dependencies {
   api externalDependency.datastaxOssCore
   api externalDependency.datastaxOssQueryBuilder
   api externalDependency.elasticSearchRest
-  api externalDependency.elasticSearchTransport
+  api externalDependency.elasticSearchJava
   implementation externalDependency.javatuples
   api externalDependency.javaxValidation
   runtimeOnly externalDependency.jna
@@ -64,6 +64,7 @@ dependencies {
   testImplementation externalDependency.testContainers
   testImplementation externalDependency.testContainersJunit
   testImplementation externalDependency.testContainersElasticsearch
+  testImplementation externalDependency.testContainersOpenSearch
   testImplementation externalDependency.testContainersCassandra
   testImplementation externalDependency.lombok
   testImplementation externalDependency.springBootTest
@@ -101,14 +102,20 @@ dependencies {
 }
 
 test {
-  // https://docs.gradle.org/current/userguide/performance.html
-  maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
+  doFirst {
+    // override, testng controlling parallelization
+    // increasing >1 will merely run all tests extra times
+    maxParallelForks = 1
+  }
+  useTestNG() {
+    suites 'src/test/resources/testng.xml'
+  }
   testLogging.showStandardStreams = true
   testLogging.exceptionFormat = 'full'
 }
 
 tasks.withType(Test) {
-    enableAssertions = false
+  enableAssertions = false
 }
 
 project.compileJava {
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java
index 8df7a9600ca94..946931a54f4ec 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java
@@ -45,15 +45,15 @@
 import lombok.Value;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.lang3.tuple.Pair;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.QueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.search.SearchHit;
+import org.opensearch.search.builder.SearchSourceBuilder;
 
 import static com.linkedin.metadata.graph.elastic.ElasticSearchGraphService.*;
 
@@ -297,12 +297,12 @@ private List<LineageRelationship> getLineageRelationships(@Nonnull List<Urn> ent
 
   // Get search query for given list of edges and source urns
   @VisibleForTesting
-  static QueryBuilder getQueryForLineage(
-      @Nonnull List<Urn> urns,
-      @Nonnull List<EdgeInfo> lineageEdges,
-      @Nonnull GraphFilters graphFilters,
-      @Nullable Long startTimeMillis,
-      @Nullable Long endTimeMillis) {
+  public static QueryBuilder getQueryForLineage(
+          @Nonnull List<Urn> urns,
+          @Nonnull List<EdgeInfo> lineageEdges,
+          @Nonnull GraphFilters graphFilters,
+          @Nullable Long startTimeMillis,
+          @Nullable Long endTimeMillis) {
     BoolQueryBuilder query = QueryBuilders.boolQuery();
     if (lineageEdges.isEmpty()) {
       return query;
@@ -361,10 +361,10 @@ static QueryBuilder getQueryForLineage(
    *                 physically stored inside the Graph Store.
    */
   @VisibleForTesting
-  static void addEdgeToPaths(
-      @Nonnull final Map<Urn, UrnArrayArray> existingPaths,
-      @Nonnull final Urn parentUrn,
-      @Nonnull final Urn childUrn) {
+  public static void addEdgeToPaths(
+          @Nonnull final Map<Urn, UrnArrayArray> existingPaths,
+          @Nonnull final Urn parentUrn,
+          @Nonnull final Urn childUrn) {
     // Collect all full-paths to this child node. This is what will be returned.
     UrnArrayArray pathsToParent = existingPaths.get(parentUrn);
     if (pathsToParent != null && pathsToParent.size() > 0) {
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java
index 8d2fcaa857541..f8b0e8a291e7a 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java
@@ -10,11 +10,11 @@
 import javax.annotation.Nullable;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.delete.DeleteRequest;
-import org.elasticsearch.action.update.UpdateRequest;
-import org.elasticsearch.common.xcontent.XContentType;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.reindex.BulkByScrollResponse;
+import org.opensearch.action.delete.DeleteRequest;
+import org.opensearch.action.update.UpdateRequest;
+import org.opensearch.common.xcontent.XContentType;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.reindex.BulkByScrollResponse;
 
 import static com.linkedin.metadata.graph.elastic.ESGraphQueryDAO.buildQuery;
 import static com.linkedin.metadata.graph.elastic.ElasticSearchGraphService.INDEX_NAME;
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java
index 346befca22559..02e36af343b07 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java
@@ -45,8 +45,8 @@
 import javax.annotation.Nullable;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.index.query.QueryBuilders;
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.index.query.QueryBuilders;
 
 
 @Slf4j
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/TimeFilterUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/TimeFilterUtils.java
index 66422c5997d17..1df938f902e0f 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/TimeFilterUtils.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/TimeFilterUtils.java
@@ -1,9 +1,9 @@
 package com.linkedin.metadata.graph.elastic;
 
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.QueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
 
 import static com.linkedin.metadata.graph.elastic.ESGraphQueryDAO.*;
 
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java
index ea1f6cead80a9..6985ceb00afd2 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java
@@ -28,17 +28,17 @@
 import javax.annotation.Nonnull;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.client.indices.GetIndexRequest;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.aggregations.AggregationBuilder;
-import org.elasticsearch.search.aggregations.AggregationBuilders;
-import org.elasticsearch.search.aggregations.bucket.terms.ParsedTerms;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.client.indices.GetIndexRequest;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.search.aggregations.AggregationBuilder;
+import org.opensearch.search.aggregations.AggregationBuilders;
+import org.opensearch.search.aggregations.bucket.terms.ParsedTerms;
+import org.opensearch.search.builder.SearchSourceBuilder;
 
 
 @Slf4j
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyEditedSource.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyEditedSource.java
index 402b579b13879..dc30d4c80abc0 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyEditedSource.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyEditedSource.java
@@ -28,18 +28,18 @@
 import javax.annotation.Nonnull;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.client.indices.GetIndexRequest;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.aggregations.AggregationBuilder;
-import org.elasticsearch.search.aggregations.AggregationBuilders;
-import org.elasticsearch.search.aggregations.BucketOrder;
-import org.elasticsearch.search.aggregations.bucket.terms.ParsedTerms;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.client.indices.GetIndexRequest;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.search.aggregations.AggregationBuilder;
+import org.opensearch.search.aggregations.AggregationBuilders;
+import org.opensearch.search.aggregations.BucketOrder;
+import org.opensearch.search.aggregations.bucket.terms.ParsedTerms;
+import org.opensearch.search.builder.SearchSourceBuilder;
 
 
 @Slf4j
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java
index 6ef207dada497..0836c569ed5d1 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java
@@ -28,18 +28,18 @@
 import javax.annotation.Nonnull;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.client.indices.GetIndexRequest;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.aggregations.AggregationBuilder;
-import org.elasticsearch.search.aggregations.AggregationBuilders;
-import org.elasticsearch.search.aggregations.BucketOrder;
-import org.elasticsearch.search.aggregations.bucket.terms.ParsedTerms;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.client.indices.GetIndexRequest;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.search.aggregations.AggregationBuilder;
+import org.opensearch.search.aggregations.AggregationBuilders;
+import org.opensearch.search.aggregations.BucketOrder;
+import org.opensearch.search.aggregations.bucket.terms.ParsedTerms;
+import org.opensearch.search.builder.SearchSourceBuilder;
 
 
 @Slf4j
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java
index 32adce458770d..bf4dffe9e5fb8 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java
@@ -27,7 +27,7 @@
 import com.linkedin.metadata.shared.ElasticSearchIndexed;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.search.SearchResponse;
+import org.opensearch.action.search.SearchResponse;
 
 
 @Slf4j
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java
index 14f67ddcbf337..10c2fd725dca9 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java
@@ -30,36 +30,36 @@
 import lombok.Getter;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.http.client.config.RequestConfig;
-import org.elasticsearch.ElasticsearchException;
-import org.elasticsearch.action.admin.cluster.node.tasks.list.ListTasksRequest;
-import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest;
-import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest.AliasActions;
-import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest;
-import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
-import org.elasticsearch.action.admin.indices.settings.get.GetSettingsRequest;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.GetAliasesResponse;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.client.core.CountRequest;
-import org.elasticsearch.client.indices.CreateIndexRequest;
-import org.elasticsearch.client.indices.GetIndexRequest;
-import org.elasticsearch.client.indices.GetIndexResponse;
-import org.elasticsearch.client.indices.GetMappingsRequest;
-import org.elasticsearch.client.indices.PutMappingRequest;
-import org.elasticsearch.client.tasks.TaskSubmissionResponse;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.common.unit.TimeValue;
-import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.index.reindex.ReindexRequest;
-import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest;
-import org.elasticsearch.search.SearchHit;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-import org.elasticsearch.search.sort.SortBuilders;
-import org.elasticsearch.search.sort.SortOrder;
-import org.elasticsearch.tasks.TaskInfo;
+import org.opensearch.OpenSearchException;
+import org.opensearch.action.admin.cluster.node.tasks.list.ListTasksRequest;
+import org.opensearch.action.admin.indices.alias.IndicesAliasesRequest;
+import org.opensearch.action.admin.indices.alias.IndicesAliasesRequest.AliasActions;
+import org.opensearch.action.admin.indices.alias.get.GetAliasesRequest;
+import org.opensearch.action.admin.indices.delete.DeleteIndexRequest;
+import org.opensearch.action.admin.indices.settings.get.GetSettingsRequest;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.client.GetAliasesResponse;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.client.core.CountRequest;
+import org.opensearch.client.indices.CreateIndexRequest;
+import org.opensearch.client.indices.GetIndexRequest;
+import org.opensearch.client.indices.GetIndexResponse;
+import org.opensearch.client.indices.GetMappingsRequest;
+import org.opensearch.client.indices.PutMappingRequest;
+import org.opensearch.client.tasks.TaskSubmissionResponse;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.common.unit.TimeValue;
+import org.opensearch.index.query.QueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.index.reindex.ReindexRequest;
+import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest;
+import org.opensearch.search.SearchHit;
+import org.opensearch.search.builder.SearchSourceBuilder;
+import org.opensearch.search.sort.SortBuilders;
+import org.opensearch.search.sort.SortOrder;
+import org.opensearch.tasks.TaskInfo;
 
 
 @Slf4j
@@ -117,7 +117,7 @@ public ESIndexBuilder(RestHighLevelClient searchClient, int numShards, int numRe
     RetryConfig config = RetryConfig.custom()
             .maxAttempts(Math.max(1, numRetries))
             .waitDuration(Duration.ofSeconds(10))
-            .retryOnException(e -> e instanceof ElasticsearchException)
+            .retryOnException(e -> e instanceof OpenSearchException)
             .failAfterMaxAttempts(true)
             .build();
 
@@ -153,7 +153,8 @@ public ReindexConfig buildReindexState(String indexName, Map<String, Object> map
     Settings currentSettings = _searchClient.indices()
             .getSettings(new GetSettingsRequest().indices(indexName), RequestOptions.DEFAULT)
             .getIndexToSettings()
-            .valuesIt()
+            .values()
+            .iterator()
             .next();
     builder.currentSettings(currentSettings);
 
@@ -170,6 +171,15 @@ public ReindexConfig buildReindexState(String indexName, Map<String, Object> map
     return builder.build();
   }
 
+  /**
+   * Builds index with given name, mappings and settings
+   * Deprecated: Use the `buildIndex(ReindexConfig indexState) to enforce conventions via ReindexConfig class
+   * earlier in the process.
+   * @param indexName index name
+   * @param mappings ES mappings
+   * @param settings ES settings
+   * @throws IOException ES error
+   */
   @Deprecated
   public void buildIndex(String indexName, Map<String, Object> mappings, Map<String, Object> settings) throws IOException {
     buildIndex(buildReindexState(indexName, mappings, settings));
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java
index a0c0bd85c04c6..4f5f2926d3da0 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java
@@ -10,7 +10,7 @@
 import lombok.Getter;
 import lombok.experimental.Accessors;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.common.settings.Settings;
+import org.opensearch.common.settings.Settings;
 
 import java.util.List;
 import java.util.Map;
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java
index 3cb3c441afd68..5fd0a80d23c50 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java
@@ -38,21 +38,21 @@
 import lombok.Value;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.lang.StringUtils;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import org.elasticsearch.search.aggregations.AggregationBuilder;
-import org.elasticsearch.search.aggregations.AggregationBuilders;
-import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude;
-import org.elasticsearch.search.aggregations.bucket.terms.ParsedTerms;
-import org.elasticsearch.search.aggregations.bucket.terms.Terms;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-import org.elasticsearch.search.sort.SortOrder;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.QueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.search.SearchHit;
+import org.opensearch.search.aggregations.AggregationBuilder;
+import org.opensearch.search.aggregations.AggregationBuilders;
+import org.opensearch.search.aggregations.bucket.terms.IncludeExclude;
+import org.opensearch.search.aggregations.bucket.terms.ParsedTerms;
+import org.opensearch.search.aggregations.bucket.terms.Terms;
+import org.opensearch.search.builder.SearchSourceBuilder;
+import org.opensearch.search.sort.SortOrder;
 
 import static com.linkedin.metadata.utils.SearchUtil.filterSoftDeletedByDefault;
 
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java
index f3864d99ba5e9..cbaf70ca22617 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java
@@ -35,20 +35,20 @@
 import javax.annotation.Nullable;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.Request;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.Response;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.client.core.CountRequest;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
-import org.elasticsearch.common.xcontent.NamedXContentRegistry;
-import org.elasticsearch.common.xcontent.XContentParser;
-import org.elasticsearch.common.xcontent.XContentType;
-import org.elasticsearch.search.SearchModule;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.client.Request;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.Response;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.client.core.CountRequest;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.common.xcontent.LoggingDeprecationHandler;
+import org.opensearch.core.xcontent.NamedXContentRegistry;
+import org.opensearch.core.xcontent.XContentParser;
+import org.opensearch.common.xcontent.XContentType;
+import org.opensearch.search.SearchModule;
+import org.opensearch.search.builder.SearchSourceBuilder;
 
 import static com.linkedin.metadata.Constants.*;
 import static com.linkedin.metadata.models.registry.template.util.TemplateUtil.*;
@@ -63,7 +63,7 @@
 public class ESSearchDAO {
   private static final NamedXContentRegistry X_CONTENT_REGISTRY;
   static {
-    SearchModule searchModule = new SearchModule(Settings.EMPTY, false, Collections.emptyList());
+    SearchModule searchModule = new SearchModule(Settings.EMPTY, Collections.emptyList());
     X_CONTENT_REGISTRY = new NamedXContentRegistry(searchModule.getNamedXContents());
   }
 
@@ -137,7 +137,7 @@ private AggregationMetadata transformAggregationMetadata(@Nonnull AggregationMet
   }
 
   @VisibleForTesting
-  SearchResult transformIndexIntoEntityName(SearchResult result) {
+  public SearchResult transformIndexIntoEntityName(SearchResult result) {
     return result.setMetadata(result.getMetadata().setAggregations(transformIndexIntoEntityName(result.getMetadata().getAggregations())));
   }
   private ScrollResult transformIndexIntoEntityName(ScrollResult result) {
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java
index d95bbcf893628..e2bdea84eda0e 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java
@@ -11,8 +11,8 @@
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.search.aggregations.AggregationBuilder;
-import org.elasticsearch.search.aggregations.AggregationBuilders;
+import org.opensearch.search.aggregations.AggregationBuilder;
+import org.opensearch.search.aggregations.AggregationBuilders;
 
 import static com.linkedin.metadata.utils.SearchUtil.*;
 
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java
index f4be46e58f3b8..bba3a9fa4232d 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java
@@ -24,15 +24,15 @@
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.MultiMatchQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.MultiMatchQueryBuilder;
+import org.opensearch.index.query.QueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.search.SearchHit;
+import org.opensearch.search.builder.SearchSourceBuilder;
+import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder;
 
 import static com.linkedin.metadata.models.SearchableFieldSpecExtractor.PRIMARY_URN_SEARCH_PROPERTIES;
 
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/PITAwareSearchRequest.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/PITAwareSearchRequest.java
index c0b1ac028e9d4..79c00fc7cdd20 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/PITAwareSearchRequest.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/PITAwareSearchRequest.java
@@ -1,7 +1,7 @@
 package com.linkedin.metadata.search.elasticsearch.query.request;
 
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.support.IndicesOptions;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.action.support.IndicesOptions;
 
 
 public class PITAwareSearchRequest extends SearchRequest {
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java
index b01c736ec23ae..ce88f31449c35 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java
@@ -35,24 +35,24 @@
 
 import com.linkedin.metadata.search.utils.ESUtils;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.common.lucene.search.function.CombineFunction;
-import org.elasticsearch.common.lucene.search.function.FieldValueFactorFunction;
-import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
-import org.elasticsearch.common.xcontent.NamedXContentRegistry;
-import org.elasticsearch.common.xcontent.XContentParser;
-import org.elasticsearch.common.xcontent.XContentType;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.Operator;
-import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.index.query.QueryStringQueryBuilder;
-import org.elasticsearch.index.query.SimpleQueryStringBuilder;
-import org.elasticsearch.index.query.functionscore.FieldValueFactorFunctionBuilder;
-import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
-import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
-import org.elasticsearch.search.SearchModule;
+import org.opensearch.common.lucene.search.function.CombineFunction;
+import org.opensearch.common.lucene.search.function.FieldValueFactorFunction;
+import org.opensearch.common.lucene.search.function.FunctionScoreQuery;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.common.xcontent.LoggingDeprecationHandler;
+import org.opensearch.core.xcontent.NamedXContentRegistry;
+import org.opensearch.common.xcontent.XContentType;
+import org.opensearch.core.xcontent.XContentParser;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.Operator;
+import org.opensearch.index.query.QueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.index.query.QueryStringQueryBuilder;
+import org.opensearch.index.query.SimpleQueryStringBuilder;
+import org.opensearch.index.query.functionscore.FieldValueFactorFunctionBuilder;
+import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder;
+import org.opensearch.index.query.functionscore.ScoreFunctionBuilders;
+import org.opensearch.search.SearchModule;
 
 import static com.linkedin.metadata.models.SearchableFieldSpecExtractor.PRIMARY_URN_SEARCH_PROPERTIES;
 import static com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder.*;
@@ -69,7 +69,7 @@ public class SearchQueryBuilder {
   }
   private static final NamedXContentRegistry X_CONTENT_REGISTRY;
   static {
-    SearchModule searchModule = new SearchModule(Settings.EMPTY, false, Collections.emptyList());
+    SearchModule searchModule = new SearchModule(Settings.EMPTY, Collections.emptyList());
     X_CONTENT_REGISTRY = new NamedXContentRegistry(searchModule.getNamedXContents());
   }
 
@@ -135,7 +135,7 @@ private QueryBuilder buildInternalQuery(@Nullable QueryConfiguration customQuery
    * @return A set of SearchFieldConfigs containing the searchable fields from the input entities.
    */
   @VisibleForTesting
-  Set<SearchFieldConfig> getStandardFields(@Nonnull Collection<EntitySpec> entitySpecs) {
+  public Set<SearchFieldConfig> getStandardFields(@Nonnull Collection<EntitySpec> entitySpecs) {
     Set<SearchFieldConfig> fields = new HashSet<>();
     // Always present
     final float urnBoost = Float.parseFloat((String) PRIMARY_URN_SEARCH_PROPERTIES.get("boostScore"));
@@ -168,7 +168,7 @@ Set<SearchFieldConfig> getStandardFields(@Nonnull Collection<EntitySpec> entityS
   }
 
   @VisibleForTesting
-  Set<SearchFieldConfig> getFieldsFromEntitySpec(EntitySpec entitySpec) {
+  public Set<SearchFieldConfig> getFieldsFromEntitySpec(EntitySpec entitySpec) {
     Set<SearchFieldConfig> fields = new HashSet<>();
     List<SearchableFieldSpec> searchableFieldSpecs = entitySpec.getSearchableFieldSpecs();
     for (SearchableFieldSpec fieldSpec : searchableFieldSpecs) {
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java
index dbd933d59d7f3..5fcc10b7af5cf 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java
@@ -54,23 +54,23 @@
 
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.lang.StringUtils;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.common.text.Text;
-import org.elasticsearch.common.unit.TimeValue;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import org.elasticsearch.search.aggregations.Aggregation;
-import org.elasticsearch.search.aggregations.AggregationBuilders;
-import org.elasticsearch.search.aggregations.Aggregations;
-import org.elasticsearch.search.aggregations.bucket.terms.ParsedTerms;
-import org.elasticsearch.search.aggregations.bucket.terms.Terms;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
-import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
-import org.elasticsearch.search.suggest.term.TermSuggestion;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.common.text.Text;
+import org.opensearch.common.unit.TimeValue;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.QueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.search.SearchHit;
+import org.opensearch.search.aggregations.Aggregation;
+import org.opensearch.search.aggregations.AggregationBuilders;
+import org.opensearch.search.aggregations.Aggregations;
+import org.opensearch.search.aggregations.bucket.terms.ParsedTerms;
+import org.opensearch.search.aggregations.bucket.terms.Terms;
+import org.opensearch.search.builder.SearchSourceBuilder;
+import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder;
+import org.opensearch.search.fetch.subphase.highlight.HighlightField;
+import org.opensearch.search.suggest.term.TermSuggestion;
 
 import static com.linkedin.metadata.search.utils.ESUtils.NAME_SUGGESTION;
 import static com.linkedin.metadata.search.utils.ESUtils.toFacetField;
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/BulkListener.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/BulkListener.java
index 297453bdce517..be64df3179a9d 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/BulkListener.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/BulkListener.java
@@ -2,11 +2,11 @@
 
 import com.linkedin.metadata.utils.metrics.MetricUtils;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.DocWriteRequest;
-import org.elasticsearch.action.bulk.BulkProcessor;
-import org.elasticsearch.action.bulk.BulkRequest;
-import org.elasticsearch.action.bulk.BulkResponse;
-import org.elasticsearch.action.support.WriteRequest;
+import org.opensearch.action.DocWriteRequest;
+import org.opensearch.action.bulk.BulkProcessor;
+import org.opensearch.action.bulk.BulkRequest;
+import org.opensearch.action.bulk.BulkResponse;
+import org.opensearch.action.support.WriteRequest;
 
 import java.util.Arrays;
 import java.util.HashMap;
@@ -76,7 +76,7 @@ private static String buildMetricName(DocWriteRequest.OpType opType, String stat
   public static String buildBulkRequestSummary(BulkRequest request) {
     return request.requests().stream().map(req -> String.format(
             "Failed to perform bulk request: index [%s], optype: [%s], type [%s], id [%s]",
-            req.index(), req.opType(), req.type(), req.id())
+            req.index(), req.opType(), req.opType(), req.id())
     ).collect(Collectors.joining(";"));
   }
 }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java
index a7ece47a7f5d6..a1e5b363d8a78 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java
@@ -7,19 +7,19 @@
 import lombok.NonNull;
 import lombok.Setter;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.DocWriteRequest;
-import org.elasticsearch.action.bulk.BackoffPolicy;
-import org.elasticsearch.action.bulk.BulkProcessor;
-import org.elasticsearch.action.bulk.BulkResponse;
-import org.elasticsearch.action.support.WriteRequest;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.client.tasks.TaskSubmissionResponse;
-import org.elasticsearch.common.Nullable;
-import org.elasticsearch.common.unit.TimeValue;
-import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.index.reindex.BulkByScrollResponse;
-import org.elasticsearch.index.reindex.DeleteByQueryRequest;
+import org.opensearch.action.DocWriteRequest;
+import org.opensearch.action.bulk.BackoffPolicy;
+import org.opensearch.action.bulk.BulkProcessor;
+import org.opensearch.action.bulk.BulkResponse;
+import org.opensearch.action.support.WriteRequest;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.client.tasks.TaskSubmissionResponse;
+import org.opensearch.common.Nullable;
+import org.opensearch.common.unit.TimeValue;
+import org.opensearch.index.query.QueryBuilder;
+import org.opensearch.index.reindex.BulkByScrollResponse;
+import org.opensearch.index.reindex.DeleteByQueryRequest;
 
 import java.io.Closeable;
 import java.io.IOException;
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESWriteDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESWriteDAO.java
index 1a63f2d4d0312..edcdf5654028c 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESWriteDAO.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESWriteDAO.java
@@ -6,15 +6,15 @@
 import javax.annotation.Nonnull;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.delete.DeleteRequest;
-import org.elasticsearch.action.update.UpdateRequest;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.client.indices.GetIndexRequest;
-import org.elasticsearch.client.indices.GetIndexResponse;
-import org.elasticsearch.common.xcontent.XContentType;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.script.Script;
+import org.opensearch.action.delete.DeleteRequest;
+import org.opensearch.action.update.UpdateRequest;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.client.indices.GetIndexRequest;
+import org.opensearch.client.indices.GetIndexResponse;
+import org.opensearch.common.xcontent.XContentType;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.script.Script;
 
 
 @Slf4j
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java
index 12c081a5c25a6..9a7d9a1b4c420 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java
@@ -16,21 +16,21 @@
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.client.RequestOptions;
+import org.opensearch.client.RequestOptions;
 import org.apache.commons.lang.StringUtils;
-import org.elasticsearch.common.unit.TimeValue;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.builder.PointInTimeBuilder;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-import org.elasticsearch.search.sort.FieldSortBuilder;
-import org.elasticsearch.search.sort.ScoreSortBuilder;
-import org.elasticsearch.search.sort.SortOrder;
-import org.elasticsearch.search.suggest.SuggestBuilder;
-import org.elasticsearch.search.suggest.SuggestBuilders;
-import org.elasticsearch.search.suggest.SuggestionBuilder;
-import org.elasticsearch.search.suggest.term.TermSuggestionBuilder;
+import org.opensearch.common.unit.TimeValue;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.QueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.search.builder.PointInTimeBuilder;
+import org.opensearch.search.builder.SearchSourceBuilder;
+import org.opensearch.search.sort.FieldSortBuilder;
+import org.opensearch.search.sort.ScoreSortBuilder;
+import org.opensearch.search.sort.SortOrder;
+import org.opensearch.search.suggest.SuggestBuilder;
+import org.opensearch.search.suggest.SuggestBuilders;
+import org.opensearch.search.suggest.SuggestionBuilder;
+import org.opensearch.search.suggest.term.TermSuggestionBuilder;
 
 import static com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig.KEYWORD_FIELDS;
 import static com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig.PATH_HIERARCHY_FIELDS;
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java
index c7e8d0940c530..5eb03eb23d01a 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java
@@ -13,26 +13,26 @@
 import javax.annotation.Nullable;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.delete.DeleteRequest;
-import org.elasticsearch.action.delete.DeleteResponse;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.action.update.UpdateRequest;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.client.tasks.GetTaskRequest;
-import org.elasticsearch.client.tasks.GetTaskResponse;
-import org.elasticsearch.common.xcontent.XContentType;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.index.reindex.BulkByScrollResponse;
-import org.elasticsearch.search.aggregations.AggregationBuilders;
-import org.elasticsearch.search.aggregations.PipelineAggregatorBuilders;
-import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
-import org.elasticsearch.search.aggregations.pipeline.BucketSortPipelineAggregationBuilder;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-import org.elasticsearch.search.sort.FieldSortBuilder;
-import org.elasticsearch.search.sort.SortOrder;
+import org.opensearch.action.delete.DeleteRequest;
+import org.opensearch.action.delete.DeleteResponse;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.action.update.UpdateRequest;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.client.tasks.GetTaskRequest;
+import org.opensearch.client.tasks.GetTaskResponse;
+import org.opensearch.common.xcontent.XContentType;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.index.reindex.BulkByScrollResponse;
+import org.opensearch.search.aggregations.AggregationBuilders;
+import org.opensearch.search.aggregations.PipelineAggregatorBuilders;
+import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
+import org.opensearch.search.aggregations.pipeline.BucketSortPipelineAggregationBuilder;
+import org.opensearch.search.builder.SearchSourceBuilder;
+import org.opensearch.search.sort.FieldSortBuilder;
+import org.opensearch.search.sort.SortOrder;
 
 import static com.linkedin.metadata.systemmetadata.ElasticSearchSystemMetadataService.INDEX_NAME;
 
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java
index 3fcb62424853a..dd8e19861ccd2 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java
@@ -31,14 +31,14 @@
 import javax.annotation.Nullable;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.tasks.GetTaskResponse;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHits;
-import org.elasticsearch.search.aggregations.bucket.filter.ParsedFilter;
-import org.elasticsearch.search.aggregations.bucket.terms.ParsedStringTerms;
-import org.elasticsearch.search.aggregations.bucket.terms.Terms;
-import org.elasticsearch.search.aggregations.metrics.ParsedMax;
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.client.tasks.GetTaskResponse;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.search.SearchHits;
+import org.opensearch.search.aggregations.bucket.filter.ParsedFilter;
+import org.opensearch.search.aggregations.bucket.terms.ParsedStringTerms;
+import org.opensearch.search.aggregations.bucket.terms.Terms;
+import org.opensearch.search.aggregations.metrics.ParsedMax;
 
 
 @Slf4j
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java
index 01fe41718d7f0..43ba87f474d6a 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java
@@ -49,26 +49,26 @@
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.action.update.UpdateRequest;
-import org.elasticsearch.client.Request;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.Response;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.client.core.CountRequest;
-import org.elasticsearch.client.core.CountResponse;
-import org.elasticsearch.client.tasks.TaskSubmissionResponse;
-import org.elasticsearch.common.unit.TimeValue;
-import org.elasticsearch.common.xcontent.XContentType;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import org.elasticsearch.search.SearchHits;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-import org.elasticsearch.search.sort.SortBuilders;
-import org.elasticsearch.search.sort.SortOrder;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.action.update.UpdateRequest;
+import org.opensearch.client.Request;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.Response;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.client.core.CountRequest;
+import org.opensearch.client.core.CountResponse;
+import org.opensearch.client.tasks.TaskSubmissionResponse;
+import org.opensearch.common.unit.TimeValue;
+import org.opensearch.common.xcontent.XContentType;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.QueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.search.SearchHit;
+import org.opensearch.search.SearchHits;
+import org.opensearch.search.builder.SearchSourceBuilder;
+import org.opensearch.search.sort.SortBuilders;
+import org.opensearch.search.sort.SortOrder;
 
 import static com.linkedin.metadata.Constants.*;
 
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java
index 6c5dbf2582c05..b0751a9c6f9ea 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java
@@ -17,7 +17,7 @@
 import javax.annotation.Nullable;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.index.query.QueryBuilder;
+import org.opensearch.index.query.QueryBuilder;
 
 
 @Slf4j
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java
index 5389d602ae5c1..316d25d1f37f4 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java
@@ -30,24 +30,24 @@
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.aggregations.AggregationBuilder;
-import org.elasticsearch.search.aggregations.AggregationBuilders;
-import org.elasticsearch.search.aggregations.Aggregations;
-import org.elasticsearch.search.aggregations.BucketOrder;
-import org.elasticsearch.search.aggregations.PipelineAggregatorBuilders;
-import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
-import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval;
-import org.elasticsearch.search.aggregations.metrics.ParsedCardinality;
-import org.elasticsearch.search.aggregations.metrics.ParsedSum;
-import org.elasticsearch.search.aggregations.pipeline.MaxBucketPipelineAggregationBuilder;
-import org.elasticsearch.search.aggregations.pipeline.ParsedBucketMetricValue;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.search.aggregations.AggregationBuilder;
+import org.opensearch.search.aggregations.AggregationBuilders;
+import org.opensearch.search.aggregations.Aggregations;
+import org.opensearch.search.aggregations.BucketOrder;
+import org.opensearch.search.aggregations.PipelineAggregatorBuilders;
+import org.opensearch.search.aggregations.bucket.MultiBucketsAggregation;
+import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval;
+import org.opensearch.search.aggregations.metrics.ParsedCardinality;
+import org.opensearch.search.aggregations.metrics.ParsedSum;
+import org.opensearch.search.aggregations.pipeline.MaxBucketPipelineAggregationBuilder;
+import org.opensearch.search.aggregations.pipeline.ParsedBucketMetricValue;
+import org.opensearch.search.builder.SearchSourceBuilder;
 
 
 @Slf4j
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/AspectUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/AspectUtilsTest.java
index 46d08bc8887b9..54fb2bc8b1f65 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/AspectUtilsTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/AspectUtilsTest.java
@@ -39,7 +39,7 @@ public AspectUtilsTest() throws EntityRegistryException {
 
   @Test
   public void testAdditionalChanges() {
-    Database server = EbeanTestUtils.createTestServer();
+    Database server = EbeanTestUtils.createTestServer(AspectUtilsTest.class.getSimpleName());
     EbeanAspectDao aspectDao = new EbeanAspectDao(server);
     aspectDao.setConnectionValidated(true);
     EventProducer mockProducer = mock(EventProducer.class);
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESTestConfiguration.java b/metadata-io/src/test/java/com/linkedin/metadata/ESTestConfiguration.java
deleted file mode 100644
index 327447341badf..0000000000000
--- a/metadata-io/src/test/java/com/linkedin/metadata/ESTestConfiguration.java
+++ /dev/null
@@ -1,153 +0,0 @@
-package com.linkedin.metadata;
-
-import com.fasterxml.jackson.dataformat.yaml.YAMLMapper;
-import com.linkedin.metadata.config.search.CustomConfiguration;
-import com.linkedin.metadata.config.search.ElasticSearchConfiguration;
-import com.linkedin.metadata.config.search.ExactMatchConfiguration;
-import com.linkedin.metadata.config.search.PartialConfiguration;
-import com.linkedin.metadata.config.search.SearchConfiguration;
-import com.linkedin.metadata.config.search.WordGramConfiguration;
-import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration;
-import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
-import com.linkedin.metadata.models.registry.EntityRegistry;
-import com.linkedin.metadata.models.registry.EntityRegistryException;
-import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
-import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
-import com.linkedin.metadata.version.GitVersion;
-import java.util.Optional;
-import org.apache.http.HttpHost;
-import org.apache.http.impl.nio.reactor.IOReactorConfig;
-import org.elasticsearch.action.support.WriteRequest;
-import org.elasticsearch.client.RestClient;
-import org.elasticsearch.client.RestClientBuilder;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.springframework.beans.factory.annotation.Qualifier;
-import org.springframework.boot.test.context.TestConfiguration;
-import org.springframework.context.annotation.Bean;
-import org.springframework.context.annotation.Primary;
-import org.springframework.context.annotation.Scope;
-import org.testcontainers.elasticsearch.ElasticsearchContainer;
-
-import javax.annotation.Nonnull;
-
-import java.util.Map;
-
-
-@TestConfiguration
-public class ESTestConfiguration {
-    private static final int HTTP_PORT = 9200;
-    public static final int REFRESH_INTERVAL_SECONDS = 5;
-
-    public static void syncAfterWrite(ESBulkProcessor bulkProcessor) throws InterruptedException {
-        bulkProcessor.flush();
-        Thread.sleep(1000);
-    }
-
-    @Bean
-    public SearchConfiguration searchConfiguration() {
-        SearchConfiguration searchConfiguration = new SearchConfiguration();
-        searchConfiguration.setMaxTermBucketSize(20);
-
-        ExactMatchConfiguration exactMatchConfiguration = new ExactMatchConfiguration();
-        exactMatchConfiguration.setExclusive(false);
-        exactMatchConfiguration.setExactFactor(10.0f);
-        exactMatchConfiguration.setWithPrefix(true);
-        exactMatchConfiguration.setPrefixFactor(6.0f);
-        exactMatchConfiguration.setCaseSensitivityFactor(0.7f);
-        exactMatchConfiguration.setEnableStructured(true);
-
-        WordGramConfiguration wordGramConfiguration = new WordGramConfiguration();
-        wordGramConfiguration.setTwoGramFactor(1.2f);
-        wordGramConfiguration.setThreeGramFactor(1.5f);
-        wordGramConfiguration.setFourGramFactor(1.8f);
-
-        PartialConfiguration partialConfiguration = new PartialConfiguration();
-        partialConfiguration.setFactor(0.4f);
-        partialConfiguration.setUrnFactor(0.5f);
-
-        searchConfiguration.setExactMatch(exactMatchConfiguration);
-        searchConfiguration.setWordGram(wordGramConfiguration);
-        searchConfiguration.setPartial(partialConfiguration);
-        return searchConfiguration;
-    }
-
-    @Bean
-    public CustomSearchConfiguration customSearchConfiguration() throws Exception {
-        CustomConfiguration customConfiguration = new CustomConfiguration();
-        customConfiguration.setEnabled(true);
-        customConfiguration.setFile("search_config_builder_test.yml");
-        return customConfiguration.resolve(new YAMLMapper());
-    }
-
-    @Scope("singleton")
-    @Bean(name = "testElasticsearchContainer")
-    @Nonnull
-    public ElasticsearchContainer elasticsearchContainer() {
-        ESTestUtils.ES_CONTAINER.start();
-        return ESTestUtils.ES_CONTAINER;
-    }
-
-    @Primary
-    @Scope("singleton")
-    @Bean(name = "elasticSearchRestHighLevelClient")
-    @Nonnull
-    public RestHighLevelClient getElasticsearchClient(@Qualifier("testElasticsearchContainer") ElasticsearchContainer esContainer) {
-        // A helper method to create an ElasticseachContainer defaulting to the current image and version, with the ability
-        // within firewalled environments to override with an environment variable to point to the offline repository.
-        // A helper method to construct a standard rest client for Elastic search.
-        final RestClientBuilder builder =
-                RestClient.builder(new HttpHost(
-                        "localhost",
-                        esContainer.getMappedPort(HTTP_PORT), "http")
-                ).setHttpClientConfigCallback(httpAsyncClientBuilder ->
-                        httpAsyncClientBuilder.setDefaultIOReactorConfig(IOReactorConfig.custom().setIoThreadCount(1).build()));
-
-        builder.setRequestConfigCallback(requestConfigBuilder -> requestConfigBuilder.
-                setConnectionRequestTimeout(30000));
-
-        return new RestHighLevelClient(builder);
-    }
-
-    /*
-      Cannot use the factory class without circular dependencies
-     */
-    @Primary
-    @Bean(name = "elasticSearchBulkProcessor")
-    @Nonnull
-    public ESBulkProcessor getBulkProcessor(@Qualifier("elasticSearchRestHighLevelClient") RestHighLevelClient searchClient) {
-        return ESBulkProcessor.builder(searchClient)
-                .async(true)
-                /*
-                 * Force a refresh as part of this request. This refresh policy does not scale for high indexing or search throughput but is useful
-                 * to present a consistent view to for indices with very low traffic. And it is wonderful for tests!
-                 */
-                .writeRequestRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE)
-                .bulkRequestsLimit(10000)
-                .bulkFlushPeriod(REFRESH_INTERVAL_SECONDS - 1)
-                .retryInterval(1L)
-                .numRetries(1)
-                .build();
-    }
-
-    @Primary
-    @Bean(name = "elasticSearchIndexBuilder")
-    @Nonnull
-    protected ESIndexBuilder getIndexBuilder(@Qualifier("elasticSearchRestHighLevelClient") RestHighLevelClient searchClient) {
-        GitVersion gitVersion = new GitVersion("0.0.0-test", "123456", Optional.empty());
-        return new ESIndexBuilder(searchClient, 1, 1, 3, 1, Map.of(),
-                false, false,
-                new ElasticSearchConfiguration(), gitVersion);
-    }
-
-    @Bean(name = "entityRegistry")
-    public EntityRegistry entityRegistry() throws EntityRegistryException {
-        return new ConfigEntityRegistry(
-                ESTestConfiguration.class.getClassLoader().getResourceAsStream("entity-registry.yml"));
-    }
-
-    @Bean(name = "longTailEntityRegistry")
-    public EntityRegistry longTailEntityRegistry() throws EntityRegistryException {
-        return new ConfigEntityRegistry(
-                ESTestConfiguration.class.getClassLoader().getResourceAsStream("entity-registry.yml"));
-    }
-}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/EbeanTestUtils.java b/metadata-io/src/test/java/com/linkedin/metadata/EbeanTestUtils.java
index 180166e963fca..c6eefede8a860 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/EbeanTestUtils.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/EbeanTestUtils.java
@@ -2,7 +2,7 @@
 
 import io.ebean.Database;
 import io.ebean.DatabaseFactory;
-import io.ebean.config.ServerConfig;
+import io.ebean.config.DatabaseConfig;
 import io.ebean.datasource.DataSourceConfig;
 
 import javax.annotation.Nonnull;
@@ -13,19 +13,19 @@ private EbeanTestUtils() {
   }
 
   @Nonnull
-  public static Database createTestServer() {
-    return DatabaseFactory.create(createTestingH2ServerConfig());
+  public static Database createTestServer(String instanceId) {
+    return DatabaseFactory.create(createTestingH2ServerConfig(instanceId));
   }
 
   @Nonnull
-  private static ServerConfig createTestingH2ServerConfig() {
+  private static DatabaseConfig createTestingH2ServerConfig(String instanceId) {
     DataSourceConfig dataSourceConfig = new DataSourceConfig();
     dataSourceConfig.setUsername("tester");
     dataSourceConfig.setPassword("");
-    dataSourceConfig.setUrl("jdbc:h2:mem:test;IGNORECASE=TRUE;mode=mysql;");
+    dataSourceConfig.setUrl(String.format("jdbc:h2:mem:%s;IGNORECASE=TRUE;mode=mysql;", instanceId));
     dataSourceConfig.setDriver("org.h2.Driver");
 
-    ServerConfig serverConfig = new ServerConfig();
+    DatabaseConfig serverConfig = new DatabaseConfig();
     serverConfig.setName("gma");
     serverConfig.setDataSourceConfig(dataSourceConfig);
     serverConfig.setDdlGenerate(true);
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/elasticsearch/update/BulkListenerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/elasticsearch/update/BulkListenerTest.java
index 154131ceb6fee..10a73cbe532a2 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/elasticsearch/update/BulkListenerTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/elasticsearch/update/BulkListenerTest.java
@@ -1,8 +1,8 @@
 package com.linkedin.metadata.elasticsearch.update;
 
 import com.linkedin.metadata.search.elasticsearch.update.BulkListener;
-import org.elasticsearch.action.bulk.BulkRequest;
-import org.elasticsearch.action.support.WriteRequest;
+import org.opensearch.action.bulk.BulkRequest;
+import org.opensearch.action.support.WriteRequest;
 import org.mockito.Mockito;
 import org.testng.annotations.Test;
 
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/elasticsearch/update/ESBulkProcessorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/elasticsearch/update/ESBulkProcessorTest.java
index 5c882e5158f90..2d84c9f3444de 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/elasticsearch/update/ESBulkProcessorTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/elasticsearch/update/ESBulkProcessorTest.java
@@ -1,7 +1,7 @@
 package com.linkedin.metadata.elasticsearch.update;
 
 import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
-import org.elasticsearch.client.RestHighLevelClient;
+import org.opensearch.client.RestHighLevelClient;
 import org.mockito.Mockito;
 import org.testng.annotations.Test;
 
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java
index 9e453e6e75677..38b2ed4ed199a 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java
@@ -22,7 +22,7 @@ public EbeanAspectMigrationsDaoTest() throws EntityRegistryException {
 
   @BeforeMethod
   public void setupTest() {
-    Database server = EbeanTestUtils.createTestServer();
+    Database server = EbeanTestUtils.createTestServer(EbeanAspectMigrationsDaoTest.class.getSimpleName());
     _mockProducer = mock(EventProducer.class);
     EbeanAspectDao dao = new EbeanAspectDao(server);
     dao.setConnectionValidated(true);
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java
index 90f9baa4ca4c2..e8a7d8740d328 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java
@@ -22,7 +22,7 @@
 import com.linkedin.metadata.utils.PegasusUtils;
 import com.linkedin.mxe.MetadataChangeProposal;
 import com.linkedin.mxe.SystemMetadata;
-import io.datahub.test.DataGenerator;
+import io.datahubproject.test.DataGenerator;
 import io.ebean.Database;
 import io.ebean.Transaction;
 import io.ebean.TxScope;
@@ -61,7 +61,8 @@ public EbeanEntityServiceTest() throws EntityRegistryException {
 
   @BeforeMethod
   public void setupTest() {
-    Database server = EbeanTestUtils.createTestServer();
+    Database server = EbeanTestUtils.createTestServer(EbeanEntityServiceTest.class.getSimpleName());
+
     _mockProducer = mock(EventProducer.class);
     _aspectDao = new EbeanAspectDao(server);
 
@@ -239,6 +240,7 @@ public void testNestedTransactions() throws AssertionError {
     System.out.println("done");
   }
 
+
   @Test
   public void dataGeneratorThreadingTest() {
     DataGenerator dataGenerator = new DataGenerator(_entityServiceImpl);
@@ -262,7 +264,7 @@ public void dataGeneratorThreadingTest() {
    * This test is designed to detect multi-threading persistence exceptions like duplicate key,
    * exceptions that exceed retry limits or unnecessary versions.
    */
-  @Test
+  @Test // ensure same thread as h2
   public void multiThreadingTest() {
     DataGenerator dataGenerator = new DataGenerator(_entityServiceImpl);
     Database server = ((EbeanAspectDao) _entityServiceImpl._aspectDao).getServer();
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java
similarity index 98%
rename from metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAOTest.java
rename to metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java
index 3ba2c858fb1a3..baed3ade0d207 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAOTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java
@@ -1,4 +1,4 @@
-package com.linkedin.metadata.graph.elastic;
+package com.linkedin.metadata.graph.search;
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.io.Resources;
@@ -8,6 +8,7 @@
 import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.metadata.Constants;
 import com.linkedin.metadata.graph.GraphFilters;
+import com.linkedin.metadata.graph.elastic.ESGraphQueryDAO;
 import com.linkedin.metadata.models.registry.LineageRegistry;
 import com.linkedin.metadata.query.filter.RelationshipDirection;
 import java.net.URL;
@@ -16,7 +17,7 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import org.elasticsearch.index.query.QueryBuilder;
+import org.opensearch.index.query.QueryBuilder;
 import org.testng.Assert;
 import org.testng.annotations.Test;
 
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java
similarity index 93%
rename from metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java
rename to metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java
index 1717e466359d3..0ce43c9d31571 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java
@@ -1,12 +1,11 @@
-package com.linkedin.metadata.graph.elastic;
+package com.linkedin.metadata.graph.search;
 
-import com.linkedin.metadata.config.search.GraphQueryConfiguration;
 import com.linkedin.common.FabricType;
 import com.linkedin.common.urn.DataPlatformUrn;
 import com.linkedin.common.urn.DatasetUrn;
 import com.linkedin.common.urn.TagUrn;
 import com.linkedin.common.urn.Urn;
-import com.linkedin.metadata.ESTestConfiguration;
+import com.linkedin.metadata.config.search.GraphQueryConfiguration;
 import com.linkedin.metadata.graph.Edge;
 import com.linkedin.metadata.graph.EntityLineageResult;
 import com.linkedin.metadata.graph.GraphService;
@@ -14,6 +13,9 @@
 import com.linkedin.metadata.graph.LineageDirection;
 import com.linkedin.metadata.graph.RelatedEntitiesResult;
 import com.linkedin.metadata.graph.RelatedEntity;
+import com.linkedin.metadata.graph.elastic.ESGraphQueryDAO;
+import com.linkedin.metadata.graph.elastic.ESGraphWriteDAO;
+import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService;
 import com.linkedin.metadata.models.registry.LineageRegistry;
 import com.linkedin.metadata.models.registry.SnapshotEntityRegistry;
 import com.linkedin.metadata.query.filter.Filter;
@@ -23,18 +25,17 @@
 import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl;
-import java.util.Arrays;
-import java.util.Collections;
-import org.elasticsearch.client.RestHighLevelClient;
+import io.datahubproject.test.search.SearchTestUtils;
 import org.junit.Assert;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.context.annotation.Import;
+import org.opensearch.client.RestHighLevelClient;
 import org.testng.SkipException;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
 import javax.annotation.Nonnull;
+import java.util.Arrays;
+import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashSet;
 import java.util.List;
@@ -43,15 +44,16 @@
 import static com.linkedin.metadata.search.utils.QueryUtils.*;
 import static org.testng.Assert.assertEquals;
 
-@Import(ESTestConfiguration.class)
-public class ElasticSearchGraphServiceTest extends GraphServiceTestBase {
+abstract public class SearchGraphServiceTestBase extends GraphServiceTestBase {
 
-  @Autowired
-  private RestHighLevelClient _searchClient;
-  @Autowired
-  private ESBulkProcessor _bulkProcessor;
-  @Autowired
-  private ESIndexBuilder _esIndexBuilder;
+  @Nonnull
+  abstract protected RestHighLevelClient getSearchClient();
+
+  @Nonnull
+  abstract protected ESBulkProcessor getBulkProcessor();
+
+  @Nonnull
+  abstract protected ESIndexBuilder getIndexBuilder();
 
   private final IndexConvention _indexConvention = new IndexConventionImpl(null);
   private final String _indexName = _indexConvention.getIndexName(INDEX_NAME);
@@ -74,10 +76,10 @@ public void wipe() throws Exception {
   @Nonnull
   private ElasticSearchGraphService buildService() {
     LineageRegistry lineageRegistry = new LineageRegistry(SnapshotEntityRegistry.getInstance());
-    ESGraphQueryDAO readDAO = new ESGraphQueryDAO(_searchClient, lineageRegistry, _indexConvention, GraphQueryConfiguration.testDefaults);
-    ESGraphWriteDAO writeDAO = new ESGraphWriteDAO(_indexConvention, _bulkProcessor, 1);
-    return new ElasticSearchGraphService(lineageRegistry, _bulkProcessor, _indexConvention, writeDAO, readDAO,
-        _esIndexBuilder);
+    ESGraphQueryDAO readDAO = new ESGraphQueryDAO(getSearchClient(), lineageRegistry, _indexConvention, GraphQueryConfiguration.testDefaults);
+    ESGraphWriteDAO writeDAO = new ESGraphWriteDAO(_indexConvention, getBulkProcessor(), 1);
+    return new ElasticSearchGraphService(lineageRegistry, getBulkProcessor(), _indexConvention, writeDAO, readDAO,
+        getIndexBuilder());
   }
 
   @Override
@@ -88,7 +90,7 @@ protected GraphService getGraphService() {
 
   @Override
   protected void syncAfterWrite() throws Exception {
-    ESTestConfiguration.syncAfterWrite(_bulkProcessor);
+    SearchTestUtils.syncAfterWrite(getBulkProcessor());
   }
 
   @Override
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/TimeFilterUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/TimeFilterUtilsTest.java
similarity index 82%
rename from metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/TimeFilterUtilsTest.java
rename to metadata-io/src/test/java/com/linkedin/metadata/graph/search/TimeFilterUtilsTest.java
index 988a7ccc70741..989f9ae197239 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/TimeFilterUtilsTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/TimeFilterUtilsTest.java
@@ -1,9 +1,11 @@
-package com.linkedin.metadata.graph.elastic;
+package com.linkedin.metadata.graph.search;
 
 import com.google.common.io.Resources;
 import java.net.URL;
 import java.nio.charset.StandardCharsets;
-import org.elasticsearch.index.query.QueryBuilder;
+
+import com.linkedin.metadata.graph.elastic.TimeFilterUtils;
+import org.opensearch.index.query.QueryBuilder;
 import org.testng.Assert;
 import org.testng.annotations.Test;
 
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/elasticsearch/SearchGraphServiceElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/elasticsearch/SearchGraphServiceElasticSearchTest.java
new file mode 100644
index 0000000000000..7b550311bf823
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/elasticsearch/SearchGraphServiceElasticSearchTest.java
@@ -0,0 +1,49 @@
+package com.linkedin.metadata.graph.search.elasticsearch;
+
+import com.linkedin.metadata.graph.search.SearchGraphServiceTestBase;
+import com.linkedin.metadata.search.elasticsearch.ElasticSearchSuite;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
+import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
+
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import org.jetbrains.annotations.NotNull;
+import org.opensearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.context.annotation.Import;
+import org.testng.AssertJUnit;
+import org.testng.annotations.Test;
+
+@Import({ElasticSearchSuite.class, SearchTestContainerConfiguration.class})
+public class SearchGraphServiceElasticSearchTest extends SearchGraphServiceTestBase {
+
+  @Autowired
+  private RestHighLevelClient _searchClient;
+  @Autowired
+  private ESBulkProcessor _bulkProcessor;
+  @Autowired
+  private ESIndexBuilder _esIndexBuilder;
+
+  @NotNull
+  @Override
+  protected RestHighLevelClient getSearchClient() {
+    return _searchClient;
+  }
+
+  @NotNull
+  @Override
+  protected ESBulkProcessor getBulkProcessor() {
+    return _bulkProcessor;
+  }
+
+  @NotNull
+  @Override
+  protected ESIndexBuilder getIndexBuilder() {
+    return _esIndexBuilder;
+  }
+
+  @Test
+  public void initTest() {
+    AssertJUnit.assertNotNull(_searchClient);
+  }
+
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/opensearch/SearchGraphServiceOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/opensearch/SearchGraphServiceOpenSearchTest.java
new file mode 100644
index 0000000000000..eabfb523fb910
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/opensearch/SearchGraphServiceOpenSearchTest.java
@@ -0,0 +1,48 @@
+package com.linkedin.metadata.graph.search.opensearch;
+
+import com.linkedin.metadata.graph.search.SearchGraphServiceTestBase;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
+import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
+import com.linkedin.metadata.search.opensearch.OpenSearchSuite;
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import org.jetbrains.annotations.NotNull;
+import org.opensearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.context.annotation.Import;
+import org.testng.AssertJUnit;
+import org.testng.annotations.Test;
+
+@Import({OpenSearchSuite.class, SearchTestContainerConfiguration.class})
+public class SearchGraphServiceOpenSearchTest extends SearchGraphServiceTestBase {
+
+  @Autowired
+  private RestHighLevelClient _searchClient;
+  @Autowired
+  private ESBulkProcessor _bulkProcessor;
+  @Autowired
+  private ESIndexBuilder _esIndexBuilder;
+
+  @NotNull
+  @Override
+  protected RestHighLevelClient getSearchClient() {
+    return _searchClient;
+  }
+
+  @NotNull
+  @Override
+  protected ESBulkProcessor getBulkProcessor() {
+    return _bulkProcessor;
+  }
+
+  @NotNull
+  @Override
+  protected ESIndexBuilder getIndexBuilder() {
+    return _esIndexBuilder;
+  }
+
+  @Test
+  public void initTest() {
+    AssertJUnit.assertNotNull(_searchClient);
+  }
+
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageSearchServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java
similarity index 94%
rename from metadata-io/src/test/java/com/linkedin/metadata/search/LineageSearchServiceTest.java
rename to metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java
index faff9f780e31c..461a146022446 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageSearchServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java
@@ -1,8 +1,5 @@
 package com.linkedin.metadata.search;
 
-import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration;
-import com.linkedin.metadata.config.cache.SearchLineageCacheConfiguration;
-import com.linkedin.metadata.config.search.SearchConfiguration;
 import com.datahub.test.Snapshot;
 import com.fasterxml.jackson.databind.node.JsonNodeFactory;
 import com.fasterxml.jackson.databind.node.ObjectNode;
@@ -16,8 +13,10 @@
 import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor;
 import com.linkedin.data.template.LongMap;
-import com.linkedin.metadata.ESTestConfiguration;
 import com.linkedin.metadata.TestEntityUtil;
+import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration;
+import com.linkedin.metadata.config.cache.SearchLineageCacheConfiguration;
+import com.linkedin.metadata.config.search.SearchConfiguration;
 import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration;
 import com.linkedin.metadata.graph.EntityLineageResult;
 import com.linkedin.metadata.graph.GraphService;
@@ -47,47 +46,60 @@
 import com.linkedin.metadata.search.utils.QueryUtils;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl;
-import java.net.URISyntaxException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
-import javax.annotation.Nonnull;
-import javax.annotation.Nullable;
-import org.elasticsearch.client.RestHighLevelClient;
 import org.junit.Assert;
 import org.mockito.Mockito;
-import org.springframework.beans.factory.annotation.Autowired;
+import org.opensearch.client.RestHighLevelClient;
 import org.springframework.cache.CacheManager;
 import org.springframework.cache.concurrent.ConcurrentMapCacheManager;
-import org.springframework.context.annotation.Import;
 import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
-import static com.linkedin.metadata.Constants.*;
-import static com.linkedin.metadata.ESTestConfiguration.*;
-import static org.mockito.ArgumentMatchers.*;
-import static org.mockito.Mockito.*;
-import static org.testng.Assert.*;
-
-@Import(ESTestConfiguration.class)
-public class LineageSearchServiceTest extends AbstractTestNGSpringContextTests {
-
-  @Autowired
-  private RestHighLevelClient _searchClient;
-  @Autowired
-  private ESBulkProcessor _bulkProcessor;
-  @Autowired
-  private ESIndexBuilder _esIndexBuilder;
-  @Autowired
-  private SearchConfiguration _searchConfiguration;
-  @Autowired
-  private CustomSearchConfiguration _customSearchConfiguration;
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME;
+import static com.linkedin.metadata.Constants.ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH;
+import static io.datahubproject.test.search.SearchTestUtils.syncAfterWrite;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyInt;
+import static org.mockito.ArgumentMatchers.anySet;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.reset;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertTrue;
+
+abstract public class LineageServiceTestBase extends AbstractTestNGSpringContextTests {
+
+  @Nonnull
+  abstract protected RestHighLevelClient getSearchClient();
+
+  @Nonnull
+  abstract protected ESBulkProcessor getBulkProcessor();
+
+  @Nonnull
+  abstract protected ESIndexBuilder getIndexBuilder();
+
+  @Nonnull
+  abstract protected SearchConfiguration getSearchConfiguration();
+
+  @Nonnull
+  abstract protected CustomSearchConfiguration getCustomSearchConfiguration();
 
   private EntityRegistry _entityRegistry;
   private IndexConvention _indexConvention;
@@ -142,18 +154,18 @@ private void resetService(boolean withCache, boolean withLightingCache) {
   public void wipe() throws Exception {
     _elasticSearchService.clear();
     clearCache(false);
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
   }
 
   @Nonnull
   private ElasticSearchService buildEntitySearchService() {
     EntityIndexBuilders indexBuilders =
-        new EntityIndexBuilders(_esIndexBuilder, _entityRegistry,
+        new EntityIndexBuilders(getIndexBuilder(), _entityRegistry,
             _indexConvention, _settingsBuilder);
-    ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, _searchClient, _indexConvention, false,
-        ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, null);
-    ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, _searchClient, _indexConvention, _searchConfiguration, _customSearchConfiguration);
-    ESWriteDAO writeDAO = new ESWriteDAO(_entityRegistry, _searchClient, _indexConvention, _bulkProcessor, 1);
+    ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, getSearchClient(), _indexConvention, false,
+        ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), null);
+    ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, getSearchClient(), _indexConvention, getSearchConfiguration(), getCustomSearchConfiguration());
+    ESWriteDAO writeDAO = new ESWriteDAO(_entityRegistry, getSearchClient(), _indexConvention, getBulkProcessor(), 1);
     return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO);
   }
 
@@ -198,7 +210,7 @@ public void testSearchService() throws Exception {
     document.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride"));
     document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c"));
     _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString());
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     when(_graphService.getLineage(eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), anyInt(),
         anyInt(), eq(null), eq(null))).thenReturn(mockResult(Collections.emptyList()));
@@ -232,7 +244,7 @@ public void testSearchService() throws Exception {
     document2.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride2"));
     document2.set("browsePaths", JsonNodeFactory.instance.textNode("/b/c"));
     _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString());
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     searchResult = searchAcrossLineage(null, TEST1);
     assertEquals(searchResult.getNumEntities().intValue(), 1);
@@ -306,7 +318,7 @@ public void testSearchService() throws Exception {
     // Cleanup
     _elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString());
     _elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString());
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     when(_graphService.getLineage(eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), anyInt(),
         anyInt())).thenReturn(
@@ -350,7 +362,7 @@ public void testScrollAcrossLineage() throws Exception {
     document.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride"));
     document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c"));
     _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString());
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     when(_graphService.getLineage(eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), anyInt(),
         anyInt(), eq(null), eq(null))).thenReturn(mockResult(Collections.emptyList()));
@@ -383,7 +395,7 @@ public void testScrollAcrossLineage() throws Exception {
 
     // Cleanup
     _elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString());
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     when(_graphService.getLineage(eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), anyInt(),
         anyInt())).thenReturn(
@@ -424,7 +436,7 @@ public void testLightningSearchService() throws Exception {
     document.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride"));
     document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c"));
     _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString());
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     when(_graphService.getLineage(eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), anyInt(),
             anyInt(), eq(null), eq(null))).thenReturn(mockResult(Collections.emptyList()));
@@ -461,7 +473,7 @@ public void testLightningSearchService() throws Exception {
     document2.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride2"));
     document2.set("browsePaths", JsonNodeFactory.instance.textNode("/b/c"));
     _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString());
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     searchResult = searchAcrossLineage(null, testStar);
     assertEquals(searchResult.getNumEntities().intValue(), 1);
@@ -616,7 +628,7 @@ public void testLightningSearchService() throws Exception {
     // Cleanup
     _elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString());
     _elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString());
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     when(_graphService.getLineage(eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), anyInt(),
             anyInt())).thenReturn(
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java
similarity index 92%
rename from metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java
rename to metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java
index ad836664d7f6d..c0144d36843f5 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java
@@ -1,7 +1,5 @@
 package com.linkedin.metadata.search;
 
-import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration;
-import com.linkedin.metadata.config.search.SearchConfiguration;
 import com.datahub.test.Snapshot;
 import com.fasterxml.jackson.databind.node.JsonNodeFactory;
 import com.fasterxml.jackson.databind.node.ObjectNode;
@@ -9,7 +7,8 @@
 import com.linkedin.common.urn.TestEntityUrn;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.data.template.StringArray;
-import com.linkedin.metadata.ESTestConfiguration;
+import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration;
+import com.linkedin.metadata.config.search.SearchConfiguration;
 import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.models.registry.SnapshotEntityRegistry;
@@ -33,11 +32,9 @@
 import com.linkedin.metadata.search.ranker.SimpleRanker;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.springframework.beans.factory.annotation.Autowired;
+import org.opensearch.client.RestHighLevelClient;
 import org.springframework.cache.CacheManager;
 import org.springframework.cache.concurrent.ConcurrentMapCacheManager;
-import org.springframework.context.annotation.Import;
 import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.BeforeMethod;
@@ -45,23 +42,28 @@
 
 import javax.annotation.Nonnull;
 
-import static com.linkedin.metadata.Constants.*;
-import static com.linkedin.metadata.ESTestConfiguration.syncAfterWrite;
+import static com.linkedin.metadata.Constants.ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH;
+import static io.datahubproject.test.search.SearchTestUtils.syncAfterWrite;
 import static org.testng.Assert.assertEquals;
 
-@Import(ESTestConfiguration.class)
-public class SearchServiceTest extends AbstractTestNGSpringContextTests {
-
-  @Autowired
-  private RestHighLevelClient _searchClient;
-  @Autowired
-  private ESBulkProcessor _bulkProcessor;
-  @Autowired
-  private ESIndexBuilder _esIndexBuilder;
-  @Autowired
-  private SearchConfiguration _searchConfiguration;
-  @Autowired
-  private CustomSearchConfiguration _customSearchConfiguration;
+
+abstract public class SearchServiceTestBase extends AbstractTestNGSpringContextTests {
+
+  @Nonnull
+  abstract protected RestHighLevelClient getSearchClient();
+
+  @Nonnull
+  abstract protected ESBulkProcessor getBulkProcessor();
+
+  @Nonnull
+  abstract protected ESIndexBuilder getIndexBuilder();
+
+  @Nonnull
+  abstract protected SearchConfiguration getSearchConfiguration();
+
+  @Nonnull
+  abstract protected CustomSearchConfiguration getCustomSearchConfiguration();
+
   private EntityRegistry _entityRegistry;
   private IndexConvention _indexConvention;
   private SettingsBuilder _settingsBuilder;
@@ -100,19 +102,19 @@ private void resetSearchService() {
   @BeforeMethod
   public void wipe() throws Exception {
     _elasticSearchService.clear();
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
   }
 
   @Nonnull
   private ElasticSearchService buildEntitySearchService() {
     EntityIndexBuilders indexBuilders =
-        new EntityIndexBuilders(_esIndexBuilder, _entityRegistry,
+        new EntityIndexBuilders(getIndexBuilder(), _entityRegistry,
             _indexConvention, _settingsBuilder);
-    ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, _searchClient, _indexConvention, false,
-        ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, null);
-    ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, _searchClient, _indexConvention, _searchConfiguration, _customSearchConfiguration);
-    ESWriteDAO writeDAO = new ESWriteDAO(_entityRegistry, _searchClient, _indexConvention,
-        _bulkProcessor, 1);
+    ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, getSearchClient(), _indexConvention, false,
+        ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), null);
+    ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, getSearchClient(), _indexConvention, getSearchConfiguration(), getCustomSearchConfiguration());
+    ESWriteDAO writeDAO = new ESWriteDAO(_entityRegistry, getSearchClient(), _indexConvention,
+        getBulkProcessor(), 1);
     return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO);
   }
 
@@ -139,7 +141,7 @@ public void testSearchService() throws Exception {
     document.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride"));
     document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c"));
     _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString());
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     searchResult = _searchService.searchAcrossEntities(ImmutableList.of(), "test", null,
             null, 0, 10, new SearchFlags().setFulltext(true));
@@ -154,7 +156,7 @@ public void testSearchService() throws Exception {
     document2.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride2"));
     document2.set("browsePaths", JsonNodeFactory.instance.textNode("/b/c"));
     _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString());
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     searchResult = _searchService.searchAcrossEntities(ImmutableList.of(), "'test2'", null,
             null, 0, 10, new SearchFlags().setFulltext(true));
@@ -167,7 +169,7 @@ public void testSearchService() throws Exception {
 
     _elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString());
     _elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString());
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
     searchResult = _searchService.searchAcrossEntities(ImmutableList.of(), "'test2'", null,
             null, 0, 10, new SearchFlags().setFulltext(true));
     assertEquals(searchResult.getNumEntities().intValue(), 0);
@@ -233,7 +235,7 @@ public void testAdvancedSearchOr() throws Exception {
     document3.set("platform", JsonNodeFactory.instance.textNode("snowflake"));
     _elasticSearchService.upsertDocument(ENTITY_NAME, document3.toString(), urn3.toString());
 
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     searchResult = _searchService.searchAcrossEntities(ImmutableList.of(), "test", filterWithCondition,
             null, 0, 10, new SearchFlags().setFulltext(true));
@@ -304,7 +306,7 @@ public void testAdvancedSearchSoftDelete() throws Exception {
     document.set("removed", JsonNodeFactory.instance.booleanNode(false));
     _elasticSearchService.upsertDocument(ENTITY_NAME, document3.toString(), urn3.toString());
 
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     searchResult = _searchService.searchAcrossEntities(ImmutableList.of(), "test", filterWithCondition,
             null, 0, 10, new SearchFlags().setFulltext(true));
@@ -369,7 +371,7 @@ public void testAdvancedSearchNegated() throws Exception {
     document.set("removed", JsonNodeFactory.instance.booleanNode(false));
     _elasticSearchService.upsertDocument(ENTITY_NAME, document3.toString(), urn3.toString());
 
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     searchResult = _searchService.searchAcrossEntities(ImmutableList.of(), "test", filterWithCondition,
             null, 0, 10, new SearchFlags().setFulltext(true));
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java
similarity index 86%
rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java
rename to metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java
index 9a6d2dc6fc1fa..d358c03c612d0 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java
@@ -1,19 +1,18 @@
-package com.linkedin.metadata.search.elasticsearch;
+package com.linkedin.metadata.search;
 
-import com.linkedin.metadata.config.search.SearchConfiguration;
 import com.datahub.test.Snapshot;
 import com.fasterxml.jackson.databind.node.JsonNodeFactory;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import com.linkedin.common.urn.TestEntityUrn;
 import com.linkedin.common.urn.Urn;
-import com.linkedin.metadata.ESTestConfiguration;
 import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor;
 import com.linkedin.metadata.browse.BrowseResult;
+import com.linkedin.metadata.config.search.SearchConfiguration;
 import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.models.registry.SnapshotEntityRegistry;
 import com.linkedin.metadata.query.SearchFlags;
-import com.linkedin.metadata.search.SearchResult;
+import com.linkedin.metadata.search.elasticsearch.ElasticSearchService;
 import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
 import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
 import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder;
@@ -23,10 +22,7 @@
 import com.linkedin.metadata.search.elasticsearch.update.ESWriteDAO;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl;
-import java.util.List;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.context.annotation.Import;
+import org.opensearch.client.RestHighLevelClient;
 import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
 import org.testcontainers.shaded.com.google.common.collect.ImmutableMap;
 import org.testng.annotations.BeforeClass;
@@ -34,24 +30,28 @@
 import org.testng.annotations.Test;
 
 import javax.annotation.Nonnull;
+import java.util.List;
 
-import static com.linkedin.metadata.Constants.*;
-import static com.linkedin.metadata.ESTestConfiguration.syncAfterWrite;
+import static com.linkedin.metadata.Constants.ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH;
+import static io.datahubproject.test.search.SearchTestUtils.syncAfterWrite;
 import static org.testng.Assert.assertEquals;
 
-@Import(ESTestConfiguration.class)
-public class ElasticSearchServiceTest extends AbstractTestNGSpringContextTests {
+abstract public class TestEntityTestBase extends AbstractTestNGSpringContextTests {
 
-  @Autowired
-  private RestHighLevelClient _searchClient;
-  @Autowired
-  private ESBulkProcessor _bulkProcessor;
-  @Autowired
-  private ESIndexBuilder _esIndexBuilder;
-  @Autowired
-  private SearchConfiguration _searchConfiguration;
-  @Autowired
-  private CustomSearchConfiguration _customSearchConfiguration;
+  @Nonnull
+  abstract protected RestHighLevelClient getSearchClient();
+
+  @Nonnull
+  abstract protected ESBulkProcessor getBulkProcessor();
+
+  @Nonnull
+  abstract protected ESIndexBuilder getIndexBuilder();
+
+  @Nonnull
+  abstract protected SearchConfiguration getSearchConfiguration();
+
+  @Nonnull
+  abstract protected CustomSearchConfiguration getCustomSearchConfiguration();
 
   private EntityRegistry _entityRegistry;
   private IndexConvention _indexConvention;
@@ -83,12 +83,12 @@ public void wipe() throws Exception {
   @Nonnull
   private ElasticSearchService buildService() {
     EntityIndexBuilders indexBuilders =
-        new EntityIndexBuilders(_esIndexBuilder, _entityRegistry, _indexConvention, _settingsBuilder);
-    ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, _searchClient, _indexConvention, false,
-        ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, null);
-    ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, _searchClient, _indexConvention, _searchConfiguration, _customSearchConfiguration);
+        new EntityIndexBuilders(getIndexBuilder(), _entityRegistry, _indexConvention, _settingsBuilder);
+    ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, getSearchClient(), _indexConvention, false,
+        ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), null);
+    ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, getSearchClient(), _indexConvention, getSearchConfiguration(), getCustomSearchConfiguration());
     ESWriteDAO writeDAO =
-        new ESWriteDAO(_entityRegistry, _searchClient, _indexConvention, _bulkProcessor, 1);
+        new ESWriteDAO(_entityRegistry, getSearchClient(), _indexConvention, getBulkProcessor(), 1);
     return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO);
   }
 
@@ -109,7 +109,7 @@ public void testElasticSearchServiceStructuredQuery() throws Exception {
     document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c"));
     document.set("foreignKey", JsonNodeFactory.instance.textNode("urn:li:tag:Node.Value"));
     _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString());
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     searchResult = _elasticSearchService.search(List.of(ENTITY_NAME), "test", null, null, 0, 10, new SearchFlags().setFulltext(false));
     assertEquals(searchResult.getNumEntities().intValue(), 1);
@@ -134,7 +134,7 @@ public void testElasticSearchServiceStructuredQuery() throws Exception {
     document2.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride2"));
     document2.set("browsePaths", JsonNodeFactory.instance.textNode("/b/c"));
     _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString());
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     searchResult = _elasticSearchService.search(List.of(ENTITY_NAME), "test2", null, null, 0, 10, new SearchFlags().setFulltext(false));
     assertEquals(searchResult.getNumEntities().intValue(), 1);
@@ -152,7 +152,7 @@ public void testElasticSearchServiceStructuredQuery() throws Exception {
 
     _elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString());
     _elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString());
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
     searchResult = _elasticSearchService.search(List.of(ENTITY_NAME), "test2", null, null, 0, 10, new SearchFlags().setFulltext(false));
     assertEquals(searchResult.getNumEntities().intValue(), 0);
     browseResult = _elasticSearchService.browse(ENTITY_NAME, "", null, 0, 10);
@@ -174,7 +174,7 @@ public void testElasticSearchServiceFulltext() throws Exception {
     document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c"));
     document.set("foreignKey", JsonNodeFactory.instance.textNode("urn:li:tag:Node.Value"));
     _elasticSearchService.upsertDocument(ENTITY_NAME, document.toString(), urn.toString());
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     searchResult = _elasticSearchService.search(List.of(ENTITY_NAME), "test", null, null, 0, 10, new SearchFlags().setFulltext(true));
     assertEquals(searchResult.getNumEntities().intValue(), 1);
@@ -191,7 +191,7 @@ public void testElasticSearchServiceFulltext() throws Exception {
     document2.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride2"));
     document2.set("browsePaths", JsonNodeFactory.instance.textNode("/b/c"));
     _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString());
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     searchResult = _elasticSearchService.search(List.of(ENTITY_NAME), "test2", null, null, 0, 10, new SearchFlags().setFulltext(true));
     assertEquals(searchResult.getNumEntities().intValue(), 1);
@@ -203,7 +203,7 @@ public void testElasticSearchServiceFulltext() throws Exception {
 
     _elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString());
     _elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString());
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
     searchResult = _elasticSearchService.search(List.of(ENTITY_NAME), "test2", null, null, 0, 10, new SearchFlags().setFulltext(true));
     assertEquals(searchResult.getNumEntities().intValue(), 0);
 
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchSuite.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchSuite.java
new file mode 100644
index 0000000000000..750423a024dcc
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchSuite.java
@@ -0,0 +1,32 @@
+package com.linkedin.metadata.search.elasticsearch;
+
+import io.datahubproject.test.search.ElasticsearchTestContainer;
+import org.springframework.boot.test.context.TestConfiguration;
+import org.springframework.context.annotation.Bean;
+import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
+import org.testcontainers.containers.GenericContainer;
+import org.testng.annotations.AfterSuite;
+
+
+@TestConfiguration
+public class ElasticSearchSuite extends AbstractTestNGSpringContextTests {
+
+    private static final ElasticsearchTestContainer ELASTICSEARCH_TEST_CONTAINER;
+    private static GenericContainer<?> container;
+    static {
+        ELASTICSEARCH_TEST_CONTAINER = new ElasticsearchTestContainer();
+    }
+
+    @AfterSuite
+    public void after() {
+        ELASTICSEARCH_TEST_CONTAINER.stopContainer();
+    }
+
+    @Bean(name = "testSearchContainer")
+    public GenericContainer<?> testSearchContainer() {
+        if (container == null) {
+            container = ELASTICSEARCH_TEST_CONTAINER.startContainer();
+        }
+        return container;
+    }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/GoldenElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/GoldenElasticSearchTest.java
new file mode 100644
index 0000000000000..cfacd4c15409a
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/GoldenElasticSearchTest.java
@@ -0,0 +1,44 @@
+package com.linkedin.metadata.search.elasticsearch;
+
+import com.linkedin.metadata.search.fixtures.GoldenTestBase;
+import io.datahubproject.test.fixtures.search.SampleDataFixtureConfiguration;
+import com.linkedin.metadata.models.registry.EntityRegistry;
+import com.linkedin.metadata.search.SearchService;
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import org.jetbrains.annotations.NotNull;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.context.annotation.Import;
+import org.testng.annotations.Test;
+
+import static org.testng.AssertJUnit.assertNotNull;
+
+@Import({ElasticSearchSuite.class, SampleDataFixtureConfiguration.class, SearchTestContainerConfiguration.class})
+public class GoldenElasticSearchTest extends GoldenTestBase {
+
+    @Autowired
+    @Qualifier("longTailSearchService")
+    protected SearchService searchService;
+
+    @Autowired
+    @Qualifier("entityRegistry")
+    private EntityRegistry entityRegistry;
+
+
+    @NotNull
+    @Override
+    protected EntityRegistry getEntityRegistry() {
+        return entityRegistry;
+    }
+
+    @NotNull
+    @Override
+    protected SearchService getSearchService() {
+        return searchService;
+    }
+
+    @Test
+    public void initTest() {
+        assertNotNull(searchService);
+    }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/IndexBuilderElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/IndexBuilderElasticSearchTest.java
new file mode 100644
index 0000000000000..20f4ee52f0e62
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/IndexBuilderElasticSearchTest.java
@@ -0,0 +1,30 @@
+package com.linkedin.metadata.search.elasticsearch;
+
+import com.linkedin.metadata.search.indexbuilder.IndexBuilderTestBase;
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import org.jetbrains.annotations.NotNull;
+import org.opensearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.context.annotation.Import;
+import org.testng.annotations.Test;
+
+import static org.testng.AssertJUnit.assertNotNull;
+
+
+@Import({ElasticSearchSuite.class, SearchTestContainerConfiguration.class})
+public class IndexBuilderElasticSearchTest extends IndexBuilderTestBase {
+
+    @Autowired
+    private RestHighLevelClient _searchClient;
+
+    @NotNull
+    @Override
+    protected RestHighLevelClient getSearchClient() {
+        return _searchClient;
+    }
+
+    @Test
+    public void initTest() {
+        assertNotNull(_searchClient);
+    }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageDataFixtureElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageDataFixtureElasticSearchTest.java
new file mode 100644
index 0000000000000..0cb49bc555421
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageDataFixtureElasticSearchTest.java
@@ -0,0 +1,43 @@
+package com.linkedin.metadata.search.elasticsearch;
+
+import com.linkedin.metadata.search.fixtures.LineageDataFixtureTestBase;
+import io.datahubproject.test.fixtures.search.SearchLineageFixtureConfiguration;
+import com.linkedin.metadata.search.LineageSearchService;
+import com.linkedin.metadata.search.SearchService;
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import org.jetbrains.annotations.NotNull;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.context.annotation.Import;
+import org.testng.AssertJUnit;
+import org.testng.annotations.Test;
+
+
+@Import({ElasticSearchSuite.class, SearchLineageFixtureConfiguration.class, SearchTestContainerConfiguration.class})
+public class LineageDataFixtureElasticSearchTest extends LineageDataFixtureTestBase {
+
+    @Autowired
+    @Qualifier("searchLineageSearchService")
+    protected SearchService searchService;
+
+    @Autowired
+    @Qualifier("searchLineageLineageSearchService")
+    protected LineageSearchService lineageService;
+
+    @NotNull
+    @Override
+    protected LineageSearchService getLineageService() {
+        return lineageService;
+    }
+
+    @NotNull
+    @Override
+    protected SearchService getSearchService() {
+        return searchService;
+    }
+
+    @Test
+    public void initTest() {
+        AssertJUnit.assertNotNull(lineageService);
+    }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageServiceElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageServiceElasticSearchTest.java
new file mode 100644
index 0000000000000..613ec5a26ff66
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/LineageServiceElasticSearchTest.java
@@ -0,0 +1,66 @@
+package com.linkedin.metadata.search.elasticsearch;
+
+import com.linkedin.metadata.config.search.SearchConfiguration;
+import com.linkedin.metadata.search.LineageServiceTestBase;
+import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
+import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
+import io.datahubproject.test.search.config.SearchCommonTestConfiguration;
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import org.jetbrains.annotations.NotNull;
+import org.opensearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.context.annotation.Import;
+import org.testng.AssertJUnit;
+import org.testng.annotations.Test;
+
+
+@Import({ElasticSearchSuite.class, SearchCommonTestConfiguration.class, SearchTestContainerConfiguration.class})
+public class LineageServiceElasticSearchTest extends LineageServiceTestBase {
+
+  @Autowired
+  private RestHighLevelClient _searchClient;
+  @Autowired
+  private ESBulkProcessor _bulkProcessor;
+  @Autowired
+  private ESIndexBuilder _esIndexBuilder;
+  @Autowired
+  private SearchConfiguration _searchConfiguration;
+  @Autowired
+  private CustomSearchConfiguration _customSearchConfiguration;
+
+  @NotNull
+  @Override
+  protected RestHighLevelClient getSearchClient() {
+    return _searchClient;
+  }
+
+  @NotNull
+  @Override
+  protected ESBulkProcessor getBulkProcessor() {
+    return _bulkProcessor;
+  }
+
+  @NotNull
+  @Override
+  protected ESIndexBuilder getIndexBuilder() {
+    return _esIndexBuilder;
+  }
+
+  @NotNull
+  @Override
+  protected SearchConfiguration getSearchConfiguration() {
+    return _searchConfiguration;
+  }
+
+  @NotNull
+  @Override
+  protected CustomSearchConfiguration getCustomSearchConfiguration() {
+    return _customSearchConfiguration;
+  }
+
+  @Test
+  public void initTest() {
+    AssertJUnit.assertNotNull(_searchClient);
+  }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SampleDataFixtureElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SampleDataFixtureElasticSearchTest.java
new file mode 100644
index 0000000000000..855f46d239118
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SampleDataFixtureElasticSearchTest.java
@@ -0,0 +1,45 @@
+package com.linkedin.metadata.search.elasticsearch;
+
+import com.linkedin.entity.client.EntityClient;
+import com.linkedin.metadata.models.registry.EntityRegistry;
+import com.linkedin.metadata.search.SearchService;
+import com.linkedin.metadata.search.fixtures.SampleDataFixtureTestBase;
+import io.datahubproject.test.fixtures.search.SampleDataFixtureConfiguration;
+
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import lombok.Getter;
+import org.opensearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.context.annotation.Import;
+import org.testng.annotations.Test;
+
+import static org.testng.AssertJUnit.assertNotNull;
+
+
+/**
+ * Runs sample data fixture tests for Elasticsearch test container
+ */
+@Getter
+@Import({ElasticSearchSuite.class, SampleDataFixtureConfiguration.class, SearchTestContainerConfiguration.class})
+public class SampleDataFixtureElasticSearchTest extends SampleDataFixtureTestBase {
+    @Autowired
+    private RestHighLevelClient searchClient;
+
+    @Autowired
+    @Qualifier("sampleDataSearchService")
+    protected SearchService searchService;
+
+    @Autowired
+    @Qualifier("sampleDataEntityClient")
+    protected EntityClient entityClient;
+
+    @Autowired
+    @Qualifier("entityRegistry")
+    private EntityRegistry entityRegistry;
+
+    @Test
+    public void initTest() {
+        assertNotNull(searchClient);
+    }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchDAOElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchDAOElasticSearchTest.java
new file mode 100644
index 0000000000000..1a6a20cd9df9d
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchDAOElasticSearchTest.java
@@ -0,0 +1,35 @@
+package com.linkedin.metadata.search.elasticsearch;
+
+import com.linkedin.metadata.config.search.SearchConfiguration;
+import com.linkedin.metadata.search.query.SearchDAOTestBase;
+import io.datahubproject.test.fixtures.search.SampleDataFixtureConfiguration;
+import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
+
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import lombok.Getter;
+import org.opensearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.context.annotation.Import;
+
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.testng.annotations.Test;
+
+import static org.testng.AssertJUnit.assertNotNull;
+
+
+@Getter
+@Import({ElasticSearchSuite.class, SampleDataFixtureConfiguration.class, SearchTestContainerConfiguration.class})
+public class SearchDAOElasticSearchTest extends SearchDAOTestBase {
+  @Autowired
+  private RestHighLevelClient searchClient;
+  @Autowired
+  private SearchConfiguration searchConfiguration;
+  @Autowired
+  @Qualifier("sampleDataIndexConvention")
+  IndexConvention indexConvention;
+
+  @Test
+  public void initTest() {
+    assertNotNull(searchClient);
+  }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchServiceElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchServiceElasticSearchTest.java
new file mode 100644
index 0000000000000..a9e9feac28007
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SearchServiceElasticSearchTest.java
@@ -0,0 +1,65 @@
+package com.linkedin.metadata.search.elasticsearch;
+
+import com.linkedin.metadata.config.search.SearchConfiguration;
+import com.linkedin.metadata.search.SearchServiceTestBase;
+import io.datahubproject.test.search.config.SearchCommonTestConfiguration;
+import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
+import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import org.jetbrains.annotations.NotNull;
+import org.opensearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.context.annotation.Import;
+import org.testng.AssertJUnit;
+import org.testng.annotations.Test;
+
+@Import({ElasticSearchSuite.class, SearchCommonTestConfiguration.class, SearchTestContainerConfiguration.class})
+public class SearchServiceElasticSearchTest extends SearchServiceTestBase {
+
+  @Autowired
+  private RestHighLevelClient _searchClient;
+  @Autowired
+  private ESBulkProcessor _bulkProcessor;
+  @Autowired
+  private ESIndexBuilder _esIndexBuilder;
+  @Autowired
+  private SearchConfiguration _searchConfiguration;
+  @Autowired
+  private CustomSearchConfiguration _customSearchConfiguration;
+
+  @NotNull
+  @Override
+  protected RestHighLevelClient getSearchClient() {
+    return _searchClient;
+  }
+
+  @NotNull
+  @Override
+  protected ESBulkProcessor getBulkProcessor() {
+    return _bulkProcessor;
+  }
+
+  @NotNull
+  @Override
+  protected ESIndexBuilder getIndexBuilder() {
+    return _esIndexBuilder;
+  }
+
+  @NotNull
+  @Override
+  protected SearchConfiguration getSearchConfiguration() {
+    return _searchConfiguration;
+  }
+
+  @NotNull
+  @Override
+  protected CustomSearchConfiguration getCustomSearchConfiguration() {
+    return _customSearchConfiguration;
+  }
+
+  @Test
+  public void initTest() {
+    AssertJUnit.assertNotNull(_searchClient);
+  }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SystemMetadataServiceElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SystemMetadataServiceElasticSearchTest.java
new file mode 100644
index 0000000000000..7365887fb9b2e
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/SystemMetadataServiceElasticSearchTest.java
@@ -0,0 +1,47 @@
+package com.linkedin.metadata.search.elasticsearch;
+
+import com.linkedin.metadata.systemmetadata.SystemMetadataServiceTestBase;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
+import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import org.jetbrains.annotations.NotNull;
+import org.opensearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.context.annotation.Import;
+import org.testng.AssertJUnit;
+import org.testng.annotations.Test;
+
+
+@Import({ElasticSearchSuite.class, SearchTestContainerConfiguration.class})
+public class SystemMetadataServiceElasticSearchTest extends SystemMetadataServiceTestBase {
+
+  @Autowired
+  private RestHighLevelClient _searchClient;
+  @Autowired
+  private ESBulkProcessor _bulkProcessor;
+  @Autowired
+  private ESIndexBuilder _esIndexBuilder;
+
+  @NotNull
+  @Override
+  protected RestHighLevelClient getSearchClient() {
+    return _searchClient;
+  }
+
+  @NotNull
+  @Override
+  protected ESBulkProcessor getBulkProcessor() {
+    return _bulkProcessor;
+  }
+
+  @NotNull
+  @Override
+  protected ESIndexBuilder getIndexBuilder() {
+    return _esIndexBuilder;
+  }
+
+  @Test
+  public void initTest() {
+    AssertJUnit.assertNotNull(_searchClient);
+  }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TestEntityElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TestEntityElasticSearchTest.java
new file mode 100644
index 0000000000000..bec610b20dca1
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TestEntityElasticSearchTest.java
@@ -0,0 +1,65 @@
+package com.linkedin.metadata.search.elasticsearch;
+
+import com.linkedin.metadata.config.search.SearchConfiguration;
+import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration;
+import com.linkedin.metadata.search.TestEntityTestBase;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
+import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
+import io.datahubproject.test.search.config.SearchCommonTestConfiguration;
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import org.jetbrains.annotations.NotNull;
+import org.opensearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.context.annotation.Import;
+import org.testng.AssertJUnit;
+import org.testng.annotations.Test;
+
+@Import({ElasticSearchSuite.class, SearchCommonTestConfiguration.class, SearchTestContainerConfiguration.class})
+public class TestEntityElasticSearchTest extends TestEntityTestBase {
+
+  @Autowired
+  private RestHighLevelClient _searchClient;
+  @Autowired
+  private ESBulkProcessor _bulkProcessor;
+  @Autowired
+  private ESIndexBuilder _esIndexBuilder;
+  @Autowired
+  private SearchConfiguration _searchConfiguration;
+  @Autowired
+  private CustomSearchConfiguration _customSearchConfiguration;
+
+  @NotNull
+  @Override
+  protected RestHighLevelClient getSearchClient() {
+    return _searchClient;
+  }
+
+  @NotNull
+  @Override
+  protected ESBulkProcessor getBulkProcessor() {
+    return _bulkProcessor;
+  }
+
+  @NotNull
+  @Override
+  protected ESIndexBuilder getIndexBuilder() {
+    return _esIndexBuilder;
+  }
+
+  @NotNull
+  @Override
+  protected SearchConfiguration getSearchConfiguration() {
+    return _searchConfiguration;
+  }
+
+  @NotNull
+  @Override
+  protected CustomSearchConfiguration getCustomSearchConfiguration() {
+    return _customSearchConfiguration;
+  }
+
+  @Test
+  public void initTest() {
+    AssertJUnit.assertNotNull(_searchClient);
+  }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TimeseriesAspectServiceElasticSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TimeseriesAspectServiceElasticSearchTest.java
new file mode 100644
index 0000000000000..5b85904edc923
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/TimeseriesAspectServiceElasticSearchTest.java
@@ -0,0 +1,46 @@
+package com.linkedin.metadata.search.elasticsearch;
+
+import com.linkedin.metadata.timeseries.search.TimeseriesAspectServiceTestBase;
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
+import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
+import org.jetbrains.annotations.NotNull;
+import org.opensearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.context.annotation.Import;
+import org.testng.AssertJUnit;
+import org.testng.annotations.Test;
+
+@Import({ElasticSearchSuite.class, SearchTestContainerConfiguration.class})
+public class TimeseriesAspectServiceElasticSearchTest extends TimeseriesAspectServiceTestBase {
+
+  @Autowired
+  private RestHighLevelClient _searchClient;
+  @Autowired
+  private ESBulkProcessor _bulkProcessor;
+  @Autowired
+  private ESIndexBuilder _esIndexBuilder;
+
+  @NotNull
+  @Override
+  protected RestHighLevelClient getSearchClient() {
+    return _searchClient;
+  }
+
+  @NotNull
+  @Override
+  protected ESBulkProcessor getBulkProcessor() {
+    return _bulkProcessor;
+  }
+
+  @NotNull
+  @Override
+  protected ESIndexBuilder getIndexBuilder() {
+    return _esIndexBuilder;
+  }
+
+  @Test
+  public void initTest() {
+    AssertJUnit.assertNotNull(_searchClient);
+  }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAOTest.java
deleted file mode 100644
index b506051e9bb5d..0000000000000
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAOTest.java
+++ /dev/null
@@ -1,312 +0,0 @@
-package com.linkedin.metadata.search.elasticsearch.query;
-
-import com.linkedin.metadata.config.search.SearchConfiguration;
-import com.datahub.test.Snapshot;
-import com.google.common.collect.ImmutableList;
-import com.linkedin.data.template.LongMap;
-import com.linkedin.data.template.StringArray;
-import com.linkedin.metadata.ESSampleDataFixture;
-import com.linkedin.metadata.models.registry.EntityRegistry;
-import com.linkedin.metadata.models.registry.SnapshotEntityRegistry;
-import com.linkedin.metadata.query.filter.Condition;
-import com.linkedin.metadata.query.filter.ConjunctiveCriterion;
-import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray;
-import com.linkedin.metadata.query.filter.CriterionArray;
-import com.linkedin.metadata.query.filter.Filter;
-import com.linkedin.metadata.search.AggregationMetadata;
-import com.linkedin.metadata.search.AggregationMetadataArray;
-import com.linkedin.metadata.search.FilterValueArray;
-import com.linkedin.metadata.search.SearchEntityArray;
-import com.linkedin.metadata.search.SearchResult;
-import com.linkedin.metadata.search.SearchResultMetadata;
-import com.linkedin.metadata.utils.SearchUtil;
-import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.context.annotation.Import;
-import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
-import org.testng.annotations.Test;
-
-import com.linkedin.metadata.query.filter.Criterion;
-import org.springframework.beans.factory.annotation.Qualifier;
-
-import static com.linkedin.metadata.Constants.*;
-import static com.linkedin.metadata.utils.SearchUtil.*;
-import static org.testng.Assert.*;
-
-
-@Import(ESSampleDataFixture.class)
-public class ESSearchDAOTest extends AbstractTestNGSpringContextTests {
-  @Autowired
-  private RestHighLevelClient _searchClient;
-  @Autowired
-  private SearchConfiguration _searchConfiguration;
-  @Autowired
-  @Qualifier("sampleDataIndexConvention")
-  IndexConvention _indexConvention;
-  EntityRegistry _entityRegistry = new SnapshotEntityRegistry(new Snapshot());
-
-
-
-  @Test
-  public void testTransformFilterForEntitiesNoChange() {
-    Criterion c = new Criterion().setValue("urn:li:tag:abc").setValues(
-        new StringArray(ImmutableList.of("urn:li:tag:abc", "urn:li:tag:def"))
-    ).setNegated(false).setCondition(Condition.EQUAL).setField("tags.keyword");
-
-    Filter f = new Filter().setOr(
-        new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(c))));
-
-    Filter transformedFilter = SearchUtil.transformFilterForEntities(f, _indexConvention);
-    assertEquals(f, transformedFilter);
-  }
-
-  @Test
-  public void testTransformFilterForEntitiesNullFilter() {
-    Filter transformedFilter = SearchUtil.transformFilterForEntities(null, _indexConvention);
-    assertNotNull(_indexConvention);
-    assertEquals(null, transformedFilter);
-  }
-
-  @Test
-  public void testTransformFilterForEntitiesWithChanges() {
-
-    Criterion c = new Criterion().setValue("dataset").setValues(
-        new StringArray(ImmutableList.of("dataset"))
-    ).setNegated(false).setCondition(Condition.EQUAL).setField("_entityType");
-
-    Filter f = new Filter().setOr(
-        new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(c))));
-    Filter originalF = null;
-    try {
-      originalF = f.copy();
-    } catch (CloneNotSupportedException e) {
-      fail(e.getMessage());
-    }
-    assertEquals(f, originalF);
-
-    Filter transformedFilter = SearchUtil.transformFilterForEntities(f, _indexConvention);
-    assertNotEquals(originalF, transformedFilter);
-
-    Criterion expectedNewCriterion = new Criterion().setValue("smpldat_datasetindex_v2").setValues(
-        new StringArray(ImmutableList.of("smpldat_datasetindex_v2"))
-    ).setNegated(false).setCondition(Condition.EQUAL).setField("_index");
-
-    Filter expectedNewFilter = new Filter().setOr(
-        new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(expectedNewCriterion))));
-
-    assertEquals(expectedNewFilter, transformedFilter);
-  }
-
-  @Test
-  public void testTransformFilterForEntitiesWithUnderscore() {
-
-    Criterion c = new Criterion().setValue("data_job").setValues(
-        new StringArray(ImmutableList.of("data_job"))
-    ).setNegated(false).setCondition(Condition.EQUAL).setField("_entityType");
-
-    Filter f = new Filter().setOr(
-        new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(c))));
-    Filter originalF = null;
-    try {
-      originalF = f.copy();
-    } catch (CloneNotSupportedException e) {
-      fail(e.getMessage());
-    }
-    assertEquals(f, originalF);
-
-    Filter transformedFilter = SearchUtil.transformFilterForEntities(f, _indexConvention);
-    assertNotEquals(originalF, transformedFilter);
-
-    Criterion expectedNewCriterion = new Criterion().setValue("smpldat_datajobindex_v2").setValues(
-        new StringArray(ImmutableList.of("smpldat_datajobindex_v2"))
-    ).setNegated(false).setCondition(Condition.EQUAL).setField("_index");
-
-    Filter expectedNewFilter = new Filter().setOr(
-        new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(expectedNewCriterion))));
-
-    assertEquals(transformedFilter, expectedNewFilter);
-  }
-
-  @Test
-  public void testTransformFilterForEntitiesWithSomeChanges() {
-
-    Criterion criterionChanged = new Criterion().setValue("dataset").setValues(
-        new StringArray(ImmutableList.of("dataset"))
-    ).setNegated(false).setCondition(Condition.EQUAL).setField("_entityType");
-    Criterion criterionUnchanged = new Criterion().setValue("urn:li:tag:abc").setValues(
-        new StringArray(ImmutableList.of("urn:li:tag:abc", "urn:li:tag:def"))
-    ).setNegated(false).setCondition(Condition.EQUAL).setField("tags.keyword");
-
-    Filter f = new Filter().setOr(
-        new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(criterionChanged, criterionUnchanged))));
-    Filter originalF = null;
-    try {
-      originalF = f.copy();
-    } catch (CloneNotSupportedException e) {
-      fail(e.getMessage());
-    }
-    assertEquals(f, originalF);
-
-    Filter transformedFilter = SearchUtil.transformFilterForEntities(f, _indexConvention);
-    assertNotEquals(originalF, transformedFilter);
-
-    Criterion expectedNewCriterion = new Criterion().setValue("smpldat_datasetindex_v2").setValues(
-        new StringArray(ImmutableList.of("smpldat_datasetindex_v2"))
-    ).setNegated(false).setCondition(Condition.EQUAL).setField("_index");
-
-    Filter expectedNewFilter = new Filter().setOr(
-        new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(expectedNewCriterion, criterionUnchanged))));
-
-    assertEquals(expectedNewFilter, transformedFilter);
-  }
-
-  @Test
-  public void testTransformIndexIntoEntityNameSingle() {
-    ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, _searchClient, _indexConvention, false,
-        ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, null);
-    // Empty aggregations
-    final SearchResultMetadata searchResultMetadata =
-        new SearchResultMetadata().setAggregations(new AggregationMetadataArray());
-    SearchResult result = new SearchResult().setEntities(new SearchEntityArray(new ArrayList<>()))
-        .setMetadata(searchResultMetadata)
-        .setFrom(0)
-        .setPageSize(100)
-        .setNumEntities(30);
-    SearchResult expectedResult = null;
-    try {
-      expectedResult = result.copy();
-    } catch (CloneNotSupportedException e) {
-      fail(e.getMessage());
-    }
-    assertEquals(expectedResult, searchDAO.transformIndexIntoEntityName(result));
-
-    // one facet, do not transform
-    Map<String, Long> aggMap = Map.of("urn:li:corpuser:datahub", Long.valueOf(3));
-
-    List<AggregationMetadata> aggregationMetadataList = new ArrayList<>();
-    aggregationMetadataList.add(new AggregationMetadata().setName("owners")
-        .setDisplayName("Owned by")
-        .setAggregations(new LongMap(aggMap))
-        .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(aggMap, Collections.emptySet())))
-    );
-    searchResultMetadata.setAggregations(new AggregationMetadataArray(aggregationMetadataList));
-    result.setMetadata(searchResultMetadata);
-
-    try {
-      expectedResult = result.copy();
-    } catch (CloneNotSupportedException e) {
-      fail(e.getMessage());
-    }
-    assertEquals(searchDAO.transformIndexIntoEntityName(result), expectedResult);
-
-    // one facet, transform
-    Map<String, Long> entityTypeMap = Map.of("smpldat_datasetindex_v2", Long.valueOf(3));
-
-    aggregationMetadataList = List.of(new AggregationMetadata().setName("_entityType")
-        .setDisplayName("Type")
-        .setAggregations(new LongMap(entityTypeMap))
-        .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(entityTypeMap, Collections.emptySet())))
-    );
-    searchResultMetadata.setAggregations(new AggregationMetadataArray(aggregationMetadataList));
-    result.setMetadata(searchResultMetadata);
-
-    Map<String, Long> expectedEntityTypeMap = Map.of("dataset", Long.valueOf(3));
-
-    List<AggregationMetadata> expectedAggregationMetadataList = List.of(
-        new AggregationMetadata().setName("_entityType")
-        .setDisplayName("Type")
-        .setAggregations(new LongMap(expectedEntityTypeMap))
-        .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(expectedEntityTypeMap, Collections.emptySet())))
-    );
-    expectedResult.setMetadata(new SearchResultMetadata().setAggregations(new AggregationMetadataArray(expectedAggregationMetadataList)));
-    assertEquals(searchDAO.transformIndexIntoEntityName(result), expectedResult);
-  }
-
-  @Test
-  public void testTransformIndexIntoEntityNameNested() {
-    ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, _searchClient, _indexConvention, false,
-        ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, null);
-    // One nested facet
-    Map<String, Long> entityTypeMap = Map.of(
-        String.format("smpldat_datasetindex_v2%surn:li:corpuser:datahub", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(3),
-        String.format("smpldat_datasetindex_v2%surn:li:corpuser:bfoo", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(7),
-        "smpldat_datasetindex_v2", Long.valueOf(20)
-    );
-    List<AggregationMetadata> aggregationMetadataList = List.of(new AggregationMetadata().setName("_entityType␞owners")
-        .setDisplayName("Type␞Owned By")
-        .setAggregations(new LongMap(entityTypeMap))
-        .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(entityTypeMap, Collections.emptySet())))
-    );
-    SearchResult result = new SearchResult().setEntities(new SearchEntityArray(new ArrayList<>()))
-        .setMetadata(new SearchResultMetadata().setAggregations(
-            new AggregationMetadataArray(aggregationMetadataList)
-        ))
-        .setFrom(0)
-        .setPageSize(100)
-        .setNumEntities(50);
-
-    Map<String, Long> expectedEntityTypeMap = Map.of(
-        String.format("dataset%surn:li:corpuser:datahub", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(3),
-        String.format("dataset%surn:li:corpuser:bfoo", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(7),
-        "dataset", Long.valueOf(20)
-    );
-
-    List<AggregationMetadata> expectedAggregationMetadataList = List.of(new AggregationMetadata().setName("_entityType␞owners")
-        .setDisplayName("Type␞Owned By")
-        .setAggregations(new LongMap(expectedEntityTypeMap))
-        .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(expectedEntityTypeMap, Collections.emptySet())))
-    );
-    SearchResult expectedResult = new SearchResult().setEntities(new SearchEntityArray(new ArrayList<>()))
-        .setMetadata(new SearchResultMetadata().setAggregations(
-            new AggregationMetadataArray(expectedAggregationMetadataList)))
-        .setFrom(0)
-        .setPageSize(100)
-        .setNumEntities(50);
-    assertEquals(searchDAO.transformIndexIntoEntityName(result), expectedResult);
-
-    // One nested facet, opposite order
-    entityTypeMap = Map.of(
-        String.format("urn:li:corpuser:datahub%ssmpldat_datasetindex_v2", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(3),
-        String.format("urn:li:corpuser:datahub%ssmpldat_chartindex_v2", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(7),
-        "urn:li:corpuser:datahub", Long.valueOf(20)
-    );
-    aggregationMetadataList = List.of(new AggregationMetadata().setName("owners␞_entityType")
-        .setDisplayName("Owned By␞Type")
-        .setAggregations(new LongMap(entityTypeMap))
-        .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(entityTypeMap, Collections.emptySet())))
-    );
-    result = new SearchResult().setEntities(new SearchEntityArray(new ArrayList<>()))
-        .setMetadata(new SearchResultMetadata().setAggregations(
-            new AggregationMetadataArray(aggregationMetadataList)
-        ))
-        .setFrom(0)
-        .setPageSize(100)
-        .setNumEntities(50);
-
-    expectedEntityTypeMap = Map.of(
-        String.format("urn:li:corpuser:datahub%sdataset", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(3),
-        String.format("urn:li:corpuser:datahub%schart", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(7),
-        "urn:li:corpuser:datahub", Long.valueOf(20)
-    );
-
-    expectedAggregationMetadataList = List.of(new AggregationMetadata().setName("owners␞_entityType")
-        .setDisplayName("Owned By␞Type")
-        .setAggregations(new LongMap(expectedEntityTypeMap))
-        .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(expectedEntityTypeMap, Collections.emptySet())))
-    );
-    expectedResult = new SearchResult().setEntities(new SearchEntityArray(new ArrayList<>()))
-        .setMetadata(new SearchResultMetadata().setAggregations(
-            new AggregationMetadataArray(expectedAggregationMetadataList)))
-        .setFrom(0)
-        .setPageSize(100)
-        .setNumEntities(50);
-    assertEquals(searchDAO.transformIndexIntoEntityName(result), expectedResult);
-  }
-
-
-}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/ElasticSearchGoldenTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java
similarity index 74%
rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/ElasticSearchGoldenTest.java
rename to metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java
index d720c95fef84d..ed81f3cebd027 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/ElasticSearchGoldenTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java
@@ -1,60 +1,47 @@
-package com.linkedin.metadata.search.elasticsearch.fixtures;
+package com.linkedin.metadata.search.fixtures;
 
 import com.linkedin.common.urn.Urn;
 import com.linkedin.datahub.graphql.generated.EntityType;
 import com.linkedin.datahub.graphql.resolvers.EntityTypeMapper;
-import com.linkedin.entity.client.EntityClient;
-import com.linkedin.metadata.ESSampleDataFixture;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.search.MatchedFieldArray;
 import com.linkedin.metadata.search.SearchEntityArray;
 import com.linkedin.metadata.search.SearchResult;
 import com.linkedin.metadata.search.SearchService;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.beans.factory.annotation.Qualifier;
-import org.springframework.context.annotation.Import;
 import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
 import org.testng.annotations.Test;
 
+import javax.annotation.Nonnull;
 import java.util.List;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
-import static com.linkedin.metadata.ESTestUtils.*;
+import static io.datahubproject.test.search.SearchTestUtils.searchAcrossCustomEntities;
+import static io.datahubproject.test.search.SearchTestUtils.searchAcrossEntities;
 import static org.testng.Assert.assertTrue;
-import static org.testng.AssertJUnit.*;
+import static org.testng.AssertJUnit.assertNotNull;
 
-@Import(ESSampleDataFixture.class)
-public class ElasticSearchGoldenTest extends AbstractTestNGSpringContextTests {
+abstract public class GoldenTestBase extends AbstractTestNGSpringContextTests {
 
     private static final List<String> SEARCHABLE_LONGTAIL_ENTITIES = Stream.of(EntityType.CHART, EntityType.CONTAINER,
                     EntityType.DASHBOARD, EntityType.DATASET, EntityType.DOMAIN, EntityType.TAG
             ).map(EntityTypeMapper::getName)
             .collect(Collectors.toList());
-    @Autowired
-    private RestHighLevelClient _searchClient;
 
-    @Autowired
-    @Qualifier("longTailSearchService")
-    protected SearchService searchService;
+    @Nonnull
+    abstract protected EntityRegistry getEntityRegistry();
 
-    @Autowired
-    @Qualifier("longTailEntityClient")
-    protected EntityClient entityClient;
-
-    @Autowired
-    @Qualifier("longTailEntityRegistry")
-    private EntityRegistry entityRegistry;
+    @Nonnull
+    abstract protected SearchService getSearchService();
 
     @Test
     public void testNameMatchPetProfiles() {
         /*
           Searching for "pet profiles" should return "pet_profiles" as the first 2 search results
          */
-        assertNotNull(searchService);
-        assertNotNull(entityRegistry);
-        SearchResult searchResult = searchAcrossCustomEntities(searchService, "pet profiles", SEARCHABLE_LONGTAIL_ENTITIES);
+        assertNotNull(getSearchService());
+        assertNotNull(getEntityRegistry());
+        SearchResult searchResult = searchAcrossCustomEntities(getSearchService(), "pet profiles", SEARCHABLE_LONGTAIL_ENTITIES);
         assertTrue(searchResult.getEntities().size() >= 2);
         Urn firstResultUrn = searchResult.getEntities().get(0).getEntity();
         Urn secondResultUrn = searchResult.getEntities().get(1).getEntity();
@@ -68,8 +55,8 @@ public void testNameMatchPetProfile() {
         /*
           Searching for "pet profile" should return "pet_profiles" as the first 2 search results
          */
-        assertNotNull(searchService);
-        SearchResult searchResult = searchAcrossEntities(searchService, "pet profile", SEARCHABLE_LONGTAIL_ENTITIES);
+        assertNotNull(getSearchService());
+        SearchResult searchResult = searchAcrossEntities(getSearchService(), "pet profile", SEARCHABLE_LONGTAIL_ENTITIES);
         assertTrue(searchResult.getEntities().size() >= 2);
         Urn firstResultUrn = searchResult.getEntities().get(0).getEntity();
         Urn secondResultUrn = searchResult.getEntities().get(1).getEntity();
@@ -84,8 +71,8 @@ public void testGlossaryTerms() {
           Searching for "ReturnRate" should return all tables that have the glossary term applied before
           anything else
          */
-        assertNotNull(searchService);
-        SearchResult searchResult = searchAcrossEntities(searchService, "ReturnRate", SEARCHABLE_LONGTAIL_ENTITIES);
+        assertNotNull(getSearchService());
+        SearchResult searchResult = searchAcrossEntities(getSearchService(), "ReturnRate", SEARCHABLE_LONGTAIL_ENTITIES);
         SearchEntityArray entities = searchResult.getEntities();
         assertTrue(searchResult.getEntities().size() >= 4);
         MatchedFieldArray firstResultMatchedFields = entities.get(0).getMatchedFields();
@@ -105,8 +92,8 @@ public void testNameMatchPartiallyQualified() {
           Searching for "analytics.pet_details" (partially qualified) should return the fully qualified table
           name as the first search results before any others
          */
-        assertNotNull(searchService);
-        SearchResult searchResult = searchAcrossEntities(searchService, "analytics.pet_details", SEARCHABLE_LONGTAIL_ENTITIES);
+        assertNotNull(getSearchService());
+        SearchResult searchResult = searchAcrossEntities(getSearchService(), "analytics.pet_details", SEARCHABLE_LONGTAIL_ENTITIES);
         assertTrue(searchResult.getEntities().size() >= 2);
         Urn firstResultUrn = searchResult.getEntities().get(0).getEntity();
         Urn secondResultUrn = searchResult.getEntities().get(1).getEntity();
@@ -121,8 +108,8 @@ public void testNameMatchCollaborativeActionitems() {
           Searching for "collaborative actionitems" should return "collaborative_actionitems" as the first search
           result, followed by "collaborative_actionitems_old"
          */
-        assertNotNull(searchService);
-        SearchResult searchResult = searchAcrossEntities(searchService, "collaborative actionitems", SEARCHABLE_LONGTAIL_ENTITIES);
+        assertNotNull(getSearchService());
+        SearchResult searchResult = searchAcrossEntities(getSearchService(), "collaborative actionitems", SEARCHABLE_LONGTAIL_ENTITIES);
         assertTrue(searchResult.getEntities().size() >= 2);
         Urn firstResultUrn = searchResult.getEntities().get(0).getEntity();
         Urn secondResultUrn = searchResult.getEntities().get(1).getEntity();
@@ -144,13 +131,17 @@ public void testNameMatchCustomerOrders() {
           Searching for "customer orders" should return "customer_orders" as the first search
           result, not suffixed by anything
          */
-        assertNotNull(searchService);
-        SearchResult searchResult = searchAcrossEntities(searchService, "customer orders", SEARCHABLE_LONGTAIL_ENTITIES);
+        assertNotNull(getSearchService());
+        SearchResult searchResult = searchAcrossEntities(getSearchService(), "customer orders", SEARCHABLE_LONGTAIL_ENTITIES);
         assertTrue(searchResult.getEntities().size() >= 2);
         Urn firstResultUrn = searchResult.getEntities().get(0).getEntity();
 
         // Checks that the table name is not suffixed with anything
-        assertTrue(firstResultUrn.toString().contains("customer_orders,"));
+        assertTrue(firstResultUrn.toString().contains("customer_orders,"),
+                "Expected firstResultUrn to contain `customer_orders,` but results are "
+                        + searchResult.getEntities().stream()
+                        .map(e -> String.format("(Score: %s Urn: %s)", e.getScore(), e.getEntity().getId()))
+                        .collect(Collectors.joining(", ")));
 
         Double firstResultScore = searchResult.getEntities().get(0).getScore();
         Double secondResultScore = searchResult.getEntities().get(1).getScore();
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SearchLineageDataFixtureTests.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/LineageDataFixtureTestBase.java
similarity index 52%
rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SearchLineageDataFixtureTests.java
rename to metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/LineageDataFixtureTestBase.java
index 55f7d4618f479..eaf8feedeb6ed 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SearchLineageDataFixtureTests.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/LineageDataFixtureTestBase.java
@@ -1,50 +1,43 @@
-package com.linkedin.metadata.search.elasticsearch.fixtures;
+package com.linkedin.metadata.search.fixtures;
 
 import com.linkedin.common.urn.Urn;
-import com.linkedin.metadata.ESSearchLineageFixture;
-import com.linkedin.metadata.ESTestUtils;
 import com.linkedin.metadata.search.LineageSearchResult;
 import com.linkedin.metadata.search.LineageSearchService;
 import com.linkedin.metadata.search.SearchResult;
 import com.linkedin.metadata.search.SearchService;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.beans.factory.annotation.Qualifier;
-import org.springframework.context.annotation.Import;
 import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
 import org.testng.annotations.Test;
 
+import javax.annotation.Nonnull;
 import java.net.URISyntaxException;
 
-import static com.linkedin.metadata.ESTestUtils.lineage;
+import static io.datahubproject.test.search.SearchTestUtils.lineage;
+import static io.datahubproject.test.search.SearchTestUtils.searchAcrossEntities;
 import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.assertNotNull;
 
+abstract public class LineageDataFixtureTestBase extends AbstractTestNGSpringContextTests {
 
-@Import(ESSearchLineageFixture.class)
-public class SearchLineageDataFixtureTests extends AbstractTestNGSpringContextTests {
+    @Nonnull
+    abstract protected LineageSearchService getLineageService();
 
-    @Autowired
-    @Qualifier("searchLineageSearchService")
-    protected SearchService searchService;
-
-    @Autowired
-    @Qualifier("searchLineageLineageSearchService")
-    protected LineageSearchService lineageService;
+    @Nonnull
+    abstract protected SearchService getSearchService();
 
 
     @Test
     public void testFixtureInitialization() {
-        assertNotNull(searchService);
-        SearchResult noResult = ESTestUtils.searchAcrossEntities(searchService, "no results");
+        assertNotNull(getSearchService());
+        SearchResult noResult = searchAcrossEntities(getSearchService(), "no results");
         assertEquals(noResult.getEntities().size(), 0);
 
-        SearchResult result = ESTestUtils.searchAcrossEntities(searchService, "e3859789eed1cef55288b44f016ee08290d9fd08973e565c112d8");
+        SearchResult result = searchAcrossEntities(getSearchService(), "e3859789eed1cef55288b44f016ee08290d9fd08973e565c112d8");
         assertEquals(result.getEntities().size(), 1);
 
         assertEquals(result.getEntities().get(0).getEntity().toString(),
                 "urn:li:dataset:(urn:li:dataPlatform:9cf8c96,e3859789eed1cef55288b44f016ee08290d9fd08973e565c112d8,PROD)");
 
-        LineageSearchResult lineageResult = lineage(lineageService, result.getEntities().get(0).getEntity(), 1);
+        LineageSearchResult lineageResult = lineage(getLineageService(), result.getEntities().get(0).getEntity(), 1);
         assertEquals(lineageResult.getEntities().size(), 10);
     }
 
@@ -54,15 +47,15 @@ public void testDatasetLineage() throws URISyntaxException {
                 "urn:li:dataset:(urn:li:dataPlatform:9cf8c96,e3859789eed1cef55288b44f016ee08290d9fd08973e565c112d8,PROD)");
 
         // 1 hops
-        LineageSearchResult lineageResult = lineage(lineageService, testUrn, 1);
+        LineageSearchResult lineageResult = lineage(getLineageService(), testUrn, 1);
         assertEquals(lineageResult.getEntities().size(), 10);
 
         // 2 hops
-        lineageResult = lineage(lineageService, testUrn, 2);
+        lineageResult = lineage(getLineageService(), testUrn, 2);
         assertEquals(lineageResult.getEntities().size(), 5);
 
         // 3 hops
-        lineageResult = lineage(lineageService, testUrn, 3);
+        lineageResult = lineage(getLineageService(), testUrn, 3);
         assertEquals(lineageResult.getEntities().size(), 12);
     }
 }
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java
similarity index 81%
rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java
rename to metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java
index 450378b247cea..1660504810296 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java
@@ -1,4 +1,4 @@
-package com.linkedin.metadata.search.elasticsearch.fixtures;
+package com.linkedin.metadata.search.fixtures;
 
 import com.datahub.authentication.Actor;
 import com.datahub.authentication.ActorType;
@@ -12,7 +12,6 @@
 import com.linkedin.datahub.graphql.types.corpuser.CorpUserType;
 import com.linkedin.datahub.graphql.types.dataset.DatasetType;
 import com.linkedin.entity.client.EntityClient;
-import com.linkedin.metadata.ESSampleDataFixture;
 import com.linkedin.metadata.models.EntitySpec;
 import com.linkedin.metadata.models.SearchableFieldSpec;
 import com.linkedin.metadata.models.registry.EntityRegistry;
@@ -28,22 +27,19 @@
 import com.linkedin.metadata.search.SearchEntity;
 import com.linkedin.metadata.search.SearchResult;
 import com.linkedin.metadata.search.SearchService;
-
 import com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig;
 import com.linkedin.r2.RemoteInvocationException;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.client.indices.AnalyzeRequest;
-import org.elasticsearch.client.indices.AnalyzeResponse;
-import org.elasticsearch.client.indices.GetMappingsRequest;
-import org.elasticsearch.client.indices.GetMappingsResponse;
 import org.junit.Assert;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.beans.factory.annotation.Qualifier;
-import org.springframework.context.annotation.Import;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.client.indices.AnalyzeRequest;
+import org.opensearch.client.indices.AnalyzeResponse;
+import org.opensearch.client.indices.GetMappingsRequest;
+import org.opensearch.client.indices.GetMappingsResponse;
 import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
 import org.testng.annotations.Test;
 
+import javax.annotation.Nonnull;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -54,36 +50,36 @@
 import java.util.stream.IntStream;
 import java.util.stream.Stream;
 
-import static com.linkedin.metadata.Constants.*;
-import static com.linkedin.metadata.ESTestUtils.*;
+import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME;
+import static com.linkedin.metadata.Constants.DATA_JOB_ENTITY_NAME;
 import static com.linkedin.metadata.search.elasticsearch.query.request.SearchQueryBuilder.STRUCTURED_QUERY_PREFIX;
-import static com.linkedin.metadata.utils.SearchUtil.*;
+import static com.linkedin.metadata.utils.SearchUtil.AGGREGATION_SEPARATOR_CHAR;
+import static io.datahubproject.test.search.SearchTestUtils.autocomplete;
+import static io.datahubproject.test.search.SearchTestUtils.scroll;
+import static io.datahubproject.test.search.SearchTestUtils.search;
+import static io.datahubproject.test.search.SearchTestUtils.searchAcrossEntities;
+import static io.datahubproject.test.search.SearchTestUtils.searchStructured;
 import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
 import static org.testng.Assert.assertNotNull;
 import static org.testng.Assert.assertSame;
 import static org.testng.Assert.assertTrue;
-import static org.testng.Assert.assertFalse;
-
 
-@Import(ESSampleDataFixture.class)
-public class SampleDataFixtureTests extends AbstractTestNGSpringContextTests {
-    private static final Authentication AUTHENTICATION =
+abstract public class SampleDataFixtureTestBase extends AbstractTestNGSpringContextTests {
+    protected static final Authentication AUTHENTICATION =
             new Authentication(new Actor(ActorType.USER, "test"), "");
 
-    @Autowired
-    private RestHighLevelClient _searchClient;
+    @Nonnull
+    abstract protected EntityRegistry getEntityRegistry();
 
-    @Autowired
-    @Qualifier("sampleDataSearchService")
-    protected SearchService searchService;
+    @Nonnull
+    abstract protected SearchService getSearchService();
 
-    @Autowired
-    @Qualifier("sampleDataEntityClient")
-    protected EntityClient entityClient;
+    @Nonnull
+    abstract protected EntityClient getEntityClient();
 
-    @Autowired
-    @Qualifier("entityRegistry")
-    private EntityRegistry entityRegistry;
+    @Nonnull
+    abstract protected RestHighLevelClient getSearchClient();
 
     @Test
     public void testSearchFieldConfig() throws IOException {
@@ -91,29 +87,29 @@ public void testSearchFieldConfig() throws IOException {
           For every field in every entity fixture, ensure proper detection of field types and analyzers
          */
         Map<EntitySpec, String> fixtureEntities = new HashMap<>();
-        fixtureEntities.put(entityRegistry.getEntitySpec("dataset"), "smpldat_datasetindex_v2");
-        fixtureEntities.put(entityRegistry.getEntitySpec("chart"), "smpldat_chartindex_v2");
-        fixtureEntities.put(entityRegistry.getEntitySpec("container"), "smpldat_containerindex_v2");
-        fixtureEntities.put(entityRegistry.getEntitySpec("corpgroup"), "smpldat_corpgroupindex_v2");
-        fixtureEntities.put(entityRegistry.getEntitySpec("corpuser"), "smpldat_corpuserindex_v2");
-        fixtureEntities.put(entityRegistry.getEntitySpec("dashboard"), "smpldat_dashboardindex_v2");
-        fixtureEntities.put(entityRegistry.getEntitySpec("dataflow"), "smpldat_dataflowindex_v2");
-        fixtureEntities.put(entityRegistry.getEntitySpec("datajob"), "smpldat_datajobindex_v2");
-        fixtureEntities.put(entityRegistry.getEntitySpec("domain"), "smpldat_domainindex_v2");
-        fixtureEntities.put(entityRegistry.getEntitySpec("glossarynode"), "smpldat_glossarynodeindex_v2");
-        fixtureEntities.put(entityRegistry.getEntitySpec("glossaryterm"), "smpldat_glossarytermindex_v2");
-        fixtureEntities.put(entityRegistry.getEntitySpec("mlfeature"), "smpldat_mlfeatureindex_v2");
-        fixtureEntities.put(entityRegistry.getEntitySpec("mlfeaturetable"), "smpldat_mlfeaturetableindex_v2");
-        fixtureEntities.put(entityRegistry.getEntitySpec("mlmodelgroup"), "smpldat_mlmodelgroupindex_v2");
-        fixtureEntities.put(entityRegistry.getEntitySpec("mlmodel"), "smpldat_mlmodelindex_v2");
-        fixtureEntities.put(entityRegistry.getEntitySpec("mlprimarykey"), "smpldat_mlprimarykeyindex_v2");
-        fixtureEntities.put(entityRegistry.getEntitySpec("tag"), "smpldat_tagindex_v2");
+        fixtureEntities.put(getEntityRegistry().getEntitySpec("dataset"), "smpldat_datasetindex_v2");
+        fixtureEntities.put(getEntityRegistry().getEntitySpec("chart"), "smpldat_chartindex_v2");
+        fixtureEntities.put(getEntityRegistry().getEntitySpec("container"), "smpldat_containerindex_v2");
+        fixtureEntities.put(getEntityRegistry().getEntitySpec("corpgroup"), "smpldat_corpgroupindex_v2");
+        fixtureEntities.put(getEntityRegistry().getEntitySpec("corpuser"), "smpldat_corpuserindex_v2");
+        fixtureEntities.put(getEntityRegistry().getEntitySpec("dashboard"), "smpldat_dashboardindex_v2");
+        fixtureEntities.put(getEntityRegistry().getEntitySpec("dataflow"), "smpldat_dataflowindex_v2");
+        fixtureEntities.put(getEntityRegistry().getEntitySpec("datajob"), "smpldat_datajobindex_v2");
+        fixtureEntities.put(getEntityRegistry().getEntitySpec("domain"), "smpldat_domainindex_v2");
+        fixtureEntities.put(getEntityRegistry().getEntitySpec("glossarynode"), "smpldat_glossarynodeindex_v2");
+        fixtureEntities.put(getEntityRegistry().getEntitySpec("glossaryterm"), "smpldat_glossarytermindex_v2");
+        fixtureEntities.put(getEntityRegistry().getEntitySpec("mlfeature"), "smpldat_mlfeatureindex_v2");
+        fixtureEntities.put(getEntityRegistry().getEntitySpec("mlfeaturetable"), "smpldat_mlfeaturetableindex_v2");
+        fixtureEntities.put(getEntityRegistry().getEntitySpec("mlmodelgroup"), "smpldat_mlmodelgroupindex_v2");
+        fixtureEntities.put(getEntityRegistry().getEntitySpec("mlmodel"), "smpldat_mlmodelindex_v2");
+        fixtureEntities.put(getEntityRegistry().getEntitySpec("mlprimarykey"), "smpldat_mlprimarykeyindex_v2");
+        fixtureEntities.put(getEntityRegistry().getEntitySpec("tag"), "smpldat_tagindex_v2");
 
         for (Map.Entry<EntitySpec, String> entry : fixtureEntities.entrySet()) {
             EntitySpec entitySpec = entry.getKey();
             GetMappingsRequest req = new GetMappingsRequest().indices(entry.getValue());
 
-            GetMappingsResponse resp = _searchClient.indices().getMapping(req, RequestOptions.DEFAULT);
+            GetMappingsResponse resp = getSearchClient().indices().getMapping(req, RequestOptions.DEFAULT);
             Map<String, Map<String, Object>> mappings = (Map<String, Map<String, Object>>) resp.mappings()
                     .get(entry.getValue()).sourceAsMap().get("properties");
 
@@ -182,7 +178,7 @@ public void testSearchFieldConfig() throws IOException {
     public void testDatasetHasTags() throws IOException {
         GetMappingsRequest req = new GetMappingsRequest()
                 .indices("smpldat_datasetindex_v2");
-        GetMappingsResponse resp = _searchClient.indices().getMapping(req, RequestOptions.DEFAULT);
+        GetMappingsResponse resp = getSearchClient().indices().getMapping(req, RequestOptions.DEFAULT);
         Map<String, Map<String, String>> mappings = (Map<String, Map<String, String>>) resp.mappings()
                 .get("smpldat_datasetindex_v2").sourceAsMap().get("properties");
         assertTrue(mappings.containsKey("hasTags"));
@@ -191,11 +187,11 @@ public void testDatasetHasTags() throws IOException {
 
     @Test
     public void testFixtureInitialization() {
-        assertNotNull(searchService);
-        SearchResult noResult = searchAcrossEntities(searchService, "no results");
+        assertNotNull(getSearchService());
+        SearchResult noResult = searchAcrossEntities(getSearchService(), "no results");
         assertEquals(0, noResult.getEntities().size());
 
-        final SearchResult result = searchAcrossEntities(searchService, "test");
+        final SearchResult result = searchAcrossEntities(getSearchService(), "test");
 
         Map<String, Integer> expectedTypes = Map.of(
                 "dataset", 13,
@@ -209,7 +205,7 @@ public void testFixtureInitialization() {
         Map<String, List<Urn>> actualTypes = new HashMap<>();
         for (String key : expectedTypes.keySet()) {
             actualTypes.put(key, result.getEntities().stream()
-                .map(SearchEntity::getEntity).filter(entity -> key.equals(entity.getEntityType())).collect(Collectors.toList()));
+                    .map(SearchEntity::getEntity).filter(entity -> key.equals(entity.getEntityType())).collect(Collectors.toList()));
         }
 
         expectedTypes.forEach((key, value) ->
@@ -241,7 +237,7 @@ public void testDataPlatform() {
                 .build();
 
         expected.forEach((key, value) -> {
-            SearchResult result = searchAcrossEntities(searchService, key);
+            SearchResult result = searchAcrossEntities(getSearchService(), key);
             assertEquals(result.getEntities().size(), value.intValue(),
                     String.format("Unexpected data platform `%s` hits.", key)); // max is 100 without pagination
         });
@@ -257,14 +253,14 @@ public void testUrn() {
                 "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_BOOL_LIST_feature)",
                 "urn:li:mlModel:(urn:li:dataPlatform:science,scienceModel,PROD)"
         ).forEach(query ->
-            assertTrue(searchAcrossEntities(searchService, query).getEntities().size() >= 1,
-                    String.format("Unexpected >1 urn result for `%s`", query))
+                assertTrue(searchAcrossEntities(getSearchService(), query).getEntities().size() >= 1,
+                        String.format("Unexpected >1 urn result for `%s`", query))
         );
     }
 
     @Test
     public void testExactTable() {
-        SearchResult results = searchAcrossEntities(searchService, "stg_customers");
+        SearchResult results = searchAcrossEntities(getSearchService(), "stg_customers");
         assertEquals(results.getEntities().size(), 1, "Unexpected single urn result for `stg_customers`");
         assertEquals(results.getEntities().get(0).getEntity().toString(),
                 "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_customers,PROD)");
@@ -281,7 +277,7 @@ public void testStemming() {
         testSets.forEach(testSet -> {
             Integer expectedResults = null;
             for (String testQuery : testSet) {
-                SearchResult results = searchAcrossEntities(searchService, testQuery);
+                SearchResult results = searchAcrossEntities(getSearchService(), testQuery);
 
                 assertTrue(results.hasEntities() && !results.getEntities().isEmpty(),
                         String.format("Expected search results for `%s`", testQuery));
@@ -299,7 +295,7 @@ public void testStemmingOverride() throws IOException {
         Set<String> testSet = Set.of("customer", "customers");
 
         Set<SearchResult> results = testSet.stream()
-                .map(test -> searchAcrossEntities(searchService, test))
+                .map(test -> searchAcrossEntities(getSearchService(), test))
                 .collect(Collectors.toSet());
 
         results.forEach(r -> assertTrue(r.hasEntities() && !r.getEntities().isEmpty(), "Expected search results"));
@@ -352,7 +348,7 @@ public void testDelimitedSynonym() throws IOException {
                 "customer acquisition cost"
         );
         List<Integer> resultCounts = testSet.stream().map(q -> {
-            SearchResult result = searchAcrossEntities(searchService, q);
+            SearchResult result = searchAcrossEntities(getSearchService(), q);
             assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                     "Expected search results for: " + q);
             return result.getEntities().size();
@@ -363,26 +359,26 @@ public void testDelimitedSynonym() throws IOException {
     public void testNegateAnalysis() throws IOException {
         String queryWithMinus = "logging_events -bckp";
         AnalyzeRequest request = AnalyzeRequest.withIndexAnalyzer(
-            "smpldat_datasetindex_v2",
-            "query_word_delimited", queryWithMinus
+                "smpldat_datasetindex_v2",
+                "query_word_delimited", queryWithMinus
         );
         assertEquals(getTokens(request)
-            .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()),
-            List.of("logging_events -bckp", "logging_ev", "-bckp", "log", "event", "bckp"));
+                        .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()),
+                List.of("logging_events -bckp", "logging_ev", "-bckp", "log", "event", "bckp"));
 
         request = AnalyzeRequest.withIndexAnalyzer(
-            "smpldat_datasetindex_v2",
-            "word_gram_3", queryWithMinus
+                "smpldat_datasetindex_v2",
+                "word_gram_3", queryWithMinus
         );
         assertEquals(getTokens(request)
-            .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("logging events -bckp"));
+                .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("logging events -bckp"));
 
         request = AnalyzeRequest.withIndexAnalyzer(
-            "smpldat_datasetindex_v2",
-            "word_gram_4", queryWithMinus
+                "smpldat_datasetindex_v2",
+                "word_gram_4", queryWithMinus
         );
         assertEquals(getTokens(request)
-            .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of());
+                .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of());
 
     }
 
@@ -391,49 +387,49 @@ public void testWordGram() throws IOException {
         String text = "hello.cat_cool_customer";
         AnalyzeRequest request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", "word_gram_2", text);
         assertEquals(getTokens(request)
-            .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("hello cat", "cat cool", "cool customer"));
+                .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("hello cat", "cat cool", "cool customer"));
         request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", "word_gram_3", text);
         assertEquals(getTokens(request)
-            .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("hello cat cool", "cat cool customer"));
+                .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("hello cat cool", "cat cool customer"));
         request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", "word_gram_4", text);
         assertEquals(getTokens(request)
-            .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("hello cat cool customer"));
+                .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("hello cat cool customer"));
 
         String testMoreSeparators = "quick.brown:fox jumped-LAZY_Dog";
         request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", "word_gram_2", testMoreSeparators);
         assertEquals(getTokens(request)
-            .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()),
-            List.of("quick brown", "brown fox", "fox jumped", "jumped lazy", "lazy dog"));
+                        .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()),
+                List.of("quick brown", "brown fox", "fox jumped", "jumped lazy", "lazy dog"));
         request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", "word_gram_3", testMoreSeparators);
         assertEquals(getTokens(request)
-            .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()),
-            List.of("quick brown fox", "brown fox jumped", "fox jumped lazy", "jumped lazy dog"));
+                        .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()),
+                List.of("quick brown fox", "brown fox jumped", "fox jumped lazy", "jumped lazy dog"));
         request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", "word_gram_4", testMoreSeparators);
         assertEquals(getTokens(request)
-            .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()),
-            List.of("quick brown fox jumped", "brown fox jumped lazy", "fox jumped lazy dog"));
+                        .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()),
+                List.of("quick brown fox jumped", "brown fox jumped lazy", "fox jumped lazy dog"));
 
         String textWithQuotesAndDuplicateWord = "\"my_db.my_exact_table\"";
         request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", "word_gram_2", textWithQuotesAndDuplicateWord);
         assertEquals(getTokens(request)
-            .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("my db", "db my", "my exact", "exact table"));
+                .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("my db", "db my", "my exact", "exact table"));
         request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", "word_gram_3", textWithQuotesAndDuplicateWord);
         assertEquals(getTokens(request)
-            .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("my db my", "db my exact", "my exact table"));
+                .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("my db my", "db my exact", "my exact table"));
         request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", "word_gram_4", textWithQuotesAndDuplicateWord);
         assertEquals(getTokens(request)
-            .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("my db my exact", "db my exact table"));
+                .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("my db my exact", "db my exact table"));
 
         String textWithParens = "(hi) there";
         request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", "word_gram_2", textWithParens);
         assertEquals(getTokens(request)
-            .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("hi there"));
+                .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of("hi there"));
 
         String oneWordText = "hello";
         for (String analyzer : List.of("word_gram_2", "word_gram_3", "word_gram_4")) {
             request = AnalyzeRequest.withIndexAnalyzer("smpldat_datasetindex_v2", analyzer, oneWordText);
             assertEquals(getTokens(request)
-                .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of());
+                    .map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList()), List.of());
         }
     }
 
@@ -463,7 +459,7 @@ public void testUrnSynonym() throws IOException {
                 "big query"
         );
         List<SearchResult> results = testSet.stream().map(query -> {
-            SearchResult result = searchAcrossEntities(searchService, query);
+            SearchResult result = searchAcrossEntities(getSearchService(), query);
             assertTrue(result.hasEntities() && !result.getEntities().isEmpty(), "Expected search results for: " + query);
             return result;
         }).collect(Collectors.toList());
@@ -504,9 +500,9 @@ public void testTokenizationWithNumber() throws IOException {
         );
         List<String> tokens = getTokens(request).map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList());
         assertEquals(tokens, List.of(
-                "harshal-playground-306419", "harshal", "playground", "306419",
-                 "test_schema", "test", "schema",
-                 "austin311_deriv", "austin311", "deriv"),
+                        "harshal-playground-306419", "harshal", "playground", "306419",
+                        "test_schema", "test", "schema",
+                        "austin311_deriv", "austin311", "deriv"),
                 String.format("Unexpected tokens. Found %s", tokens));
 
         request = AnalyzeRequest.withIndexAnalyzer(
@@ -622,7 +618,7 @@ public void testChartAutoComplete() throws InterruptedException, IOException {
         List.of("B", "Ba", "Baz", "Baz ", "Baz C", "Baz Ch", "Baz Cha", "Baz Char", "Baz Chart", "Baz Chart ")
                 .forEach(query -> {
                     try {
-                        AutoCompleteResults result = autocomplete(new ChartType(entityClient), query);
+                        AutoCompleteResults result = autocomplete(new ChartType(getEntityClient()), query);
                         assertTrue(result.getEntities().size() == 2,
                                 String.format("Expected 2 results for `%s` found %s", query, result.getEntities().size()));
                     } catch (Exception e) {
@@ -637,7 +633,7 @@ public void testDatasetAutoComplete() {
                         "excess_deaths_de", "excess_deaths_der", "excess_deaths_derived")
                 .forEach(query -> {
                     try {
-                        AutoCompleteResults result = autocomplete(new DatasetType(entityClient), query);
+                        AutoCompleteResults result = autocomplete(new DatasetType(getEntityClient()), query);
                         assertTrue(result.getEntities().size() >= 1,
                                 String.format("Expected >= 1 results for `%s` found %s", query, result.getEntities().size()));
                     } catch (Exception e) {
@@ -652,7 +648,7 @@ public void testContainerAutoComplete() {
                         "container-autocomp-test")
                 .forEach(query -> {
                     try {
-                        AutoCompleteResults result = autocomplete(new ContainerType(entityClient), query);
+                        AutoCompleteResults result = autocomplete(new ContainerType(getEntityClient()), query);
                         assertTrue(result.getEntities().size() >= 1,
                                 String.format("Expected >= 1 results for `%s` found %s", query, result.getEntities().size()));
                     } catch (Exception e) {
@@ -666,7 +662,7 @@ public void testGroupAutoComplete() {
         List.of("T", "Te", "Tes", "Test ", "Test G", "Test Gro", "Test Group ")
                 .forEach(query -> {
                     try {
-                        AutoCompleteResults result = autocomplete(new CorpGroupType(entityClient), query);
+                        AutoCompleteResults result = autocomplete(new CorpGroupType(getEntityClient()), query);
                         assertTrue(result.getEntities().size() == 1,
                                 String.format("Expected 1 results for `%s` found %s", query, result.getEntities().size()));
                     } catch (Exception e) {
@@ -680,7 +676,7 @@ public void testUserAutoComplete() {
         List.of("D", "Da", "Dat", "Data ", "Data H", "Data Hu", "Data Hub", "Data Hub ")
                 .forEach(query -> {
                     try {
-                        AutoCompleteResults result = autocomplete(new CorpUserType(entityClient, null), query);
+                        AutoCompleteResults result = autocomplete(new CorpUserType(getEntityClient(), null), query);
                         assertTrue(result.getEntities().size() >= 1,
                                 String.format("Expected at least 1 results for `%s` found %s", query, result.getEntities().size()));
                     } catch (Exception e) {
@@ -702,7 +698,7 @@ public void testSmokeTestQueries() {
         );
 
         Map<String, SearchResult> results = expectedFulltextMinimums.entrySet().stream()
-                .collect(Collectors.toMap(Map.Entry::getKey, entry -> searchAcrossEntities(searchService, entry.getKey())));
+                .collect(Collectors.toMap(Map.Entry::getKey, entry -> searchAcrossEntities(getSearchService(), entry.getKey())));
 
         results.forEach((key, value) -> {
             Integer actualCount = value.getEntities().size();
@@ -719,7 +715,7 @@ public void testSmokeTestQueries() {
         );
 
         results = expectedStructuredMinimums.entrySet().stream()
-                .collect(Collectors.toMap(Map.Entry::getKey, entry -> searchStructured(searchService, entry.getKey())));
+                .collect(Collectors.toMap(Map.Entry::getKey, entry -> searchStructured(getSearchService(), entry.getKey())));
 
         results.forEach((key, value) -> {
             Integer actualCount = value.getEntities().size();
@@ -772,7 +768,7 @@ public void testUnderscore() throws IOException {
     @Test
     public void testFacets() {
         Set<String> expectedFacets = Set.of("entity", "typeNames", "platform", "origin", "tags");
-        SearchResult testResult = searchAcrossEntities(searchService, "cypress");
+        SearchResult testResult = searchAcrossEntities(getSearchService(), "cypress");
         expectedFacets.forEach(facet -> {
             assertTrue(testResult.getMetadata().getAggregations().stream().anyMatch(agg -> agg.getName().equals(facet)),
                     String.format("Failed to find facet `%s` in %s", facet,
@@ -780,7 +776,7 @@ public void testFacets() {
                                     .map(AggregationMetadata::getName).collect(Collectors.toList())));
         });
         AggregationMetadata entityAggMeta = testResult.getMetadata().getAggregations().stream().filter(
-            aggMeta -> aggMeta.getName().equals("entity")).findFirst().get();
+                aggMeta -> aggMeta.getName().equals("entity")).findFirst().get();
         Map<String, Long> expectedEntityTypeCounts = new HashMap<>();
         expectedEntityTypeCounts.put("container", 0L);
         expectedEntityTypeCounts.put("corpuser", 0L);
@@ -805,28 +801,28 @@ public void testFacets() {
     @Test
     public void testNestedAggregation() {
         Set<String> expectedFacets = Set.of("platform");
-        SearchResult testResult = searchAcrossEntities(searchService, "cypress", List.copyOf(expectedFacets));
+        SearchResult testResult = searchAcrossEntities(getSearchService(), "cypress", List.copyOf(expectedFacets));
         assertEquals(testResult.getMetadata().getAggregations().size(), 1);
         expectedFacets.forEach(facet -> {
             assertTrue(testResult.getMetadata().getAggregations().stream().anyMatch(agg -> agg.getName().equals(facet)),
-                String.format("Failed to find facet `%s` in %s", facet,
-                    testResult.getMetadata().getAggregations().stream()
-                        .map(AggregationMetadata::getName).collect(Collectors.toList())));
+                    String.format("Failed to find facet `%s` in %s", facet,
+                            testResult.getMetadata().getAggregations().stream()
+                                    .map(AggregationMetadata::getName).collect(Collectors.toList())));
         });
 
         expectedFacets = Set.of("platform", "typeNames", "_entityType", "entity");
-        SearchResult testResult2 = searchAcrossEntities(searchService, "cypress", List.copyOf(expectedFacets));
+        SearchResult testResult2 = searchAcrossEntities(getSearchService(), "cypress", List.copyOf(expectedFacets));
         assertEquals(testResult2.getMetadata().getAggregations().size(), 4);
         expectedFacets.forEach(facet -> {
             assertTrue(testResult2.getMetadata().getAggregations().stream().anyMatch(agg -> agg.getName().equals(facet)),
-                String.format("Failed to find facet `%s` in %s", facet,
-                    testResult2.getMetadata().getAggregations().stream()
-                        .map(AggregationMetadata::getName).collect(Collectors.toList())));
+                    String.format("Failed to find facet `%s` in %s", facet,
+                            testResult2.getMetadata().getAggregations().stream()
+                                    .map(AggregationMetadata::getName).collect(Collectors.toList())));
         });
         AggregationMetadata entityTypeAggMeta = testResult2.getMetadata().getAggregations().stream().filter(
-            aggMeta -> aggMeta.getName().equals("_entityType")).findFirst().get();
+                aggMeta -> aggMeta.getName().equals("_entityType")).findFirst().get();
         AggregationMetadata entityAggMeta = testResult2.getMetadata().getAggregations().stream().filter(
-            aggMeta -> aggMeta.getName().equals("entity")).findFirst().get();
+                aggMeta -> aggMeta.getName().equals("entity")).findFirst().get();
         assertEquals(entityTypeAggMeta.getAggregations(), entityAggMeta.getAggregations());
         Map<String, Long> expectedEntityTypeCounts = new HashMap<>();
         expectedEntityTypeCounts.put("container", 0L);
@@ -849,24 +845,24 @@ public void testNestedAggregation() {
         assertEquals(entityTypeAggMeta.getAggregations(), expectedEntityTypeCounts);
 
         expectedFacets = Set.of("platform", "typeNames", "entity");
-        SearchResult testResult3 = searchAcrossEntities(searchService, "cypress", List.copyOf(expectedFacets));
+        SearchResult testResult3 = searchAcrossEntities(getSearchService(), "cypress", List.copyOf(expectedFacets));
         assertEquals(testResult3.getMetadata().getAggregations().size(), 4);
         expectedFacets.forEach(facet -> {
             assertTrue(testResult3.getMetadata().getAggregations().stream().anyMatch(agg -> agg.getName().equals(facet)),
-                String.format("Failed to find facet `%s` in %s", facet,
-                    testResult3.getMetadata().getAggregations().stream()
-                        .map(AggregationMetadata::getName).collect(Collectors.toList())));
+                    String.format("Failed to find facet `%s` in %s", facet,
+                            testResult3.getMetadata().getAggregations().stream()
+                                    .map(AggregationMetadata::getName).collect(Collectors.toList())));
         });
         AggregationMetadata entityTypeAggMeta3 = testResult3.getMetadata().getAggregations().stream().filter(
-            aggMeta -> aggMeta.getName().equals("_entityType")).findFirst().get();
+                aggMeta -> aggMeta.getName().equals("_entityType")).findFirst().get();
         AggregationMetadata entityAggMeta3 = testResult3.getMetadata().getAggregations().stream().filter(
-            aggMeta -> aggMeta.getName().equals("entity")).findFirst().get();
+                aggMeta -> aggMeta.getName().equals("entity")).findFirst().get();
         assertEquals(entityTypeAggMeta3.getAggregations(), entityAggMeta3.getAggregations());
         assertEquals(entityTypeAggMeta3.getAggregations(), expectedEntityTypeCounts);
 
         String singleNestedFacet = String.format("_entityType%sowners", AGGREGATION_SEPARATOR_CHAR);
         expectedFacets = Set.of(singleNestedFacet);
-        SearchResult testResultSingleNested = searchAcrossEntities(searchService, "cypress", List.copyOf(expectedFacets));
+        SearchResult testResultSingleNested = searchAcrossEntities(getSearchService(), "cypress", List.copyOf(expectedFacets));
         assertEquals(testResultSingleNested.getMetadata().getAggregations().size(), 1);
         Map<String, Long> expectedNestedFacetCounts = new HashMap<>();
         expectedNestedFacetCounts.put("datajob␞urn:li:corpuser:datahub", 2L);
@@ -885,17 +881,17 @@ public void testNestedAggregation() {
         assertEquals(testResultSingleNested.getMetadata().getAggregations().get(0).getAggregations(), expectedNestedFacetCounts);
 
         expectedFacets = Set.of("platform", singleNestedFacet, "typeNames", "origin");
-        SearchResult testResultNested = searchAcrossEntities(searchService, "cypress", List.copyOf(expectedFacets));
+        SearchResult testResultNested = searchAcrossEntities(getSearchService(), "cypress", List.copyOf(expectedFacets));
         assertEquals(testResultNested.getMetadata().getAggregations().size(), 4);
         expectedFacets.forEach(facet -> {
             assertTrue(testResultNested.getMetadata().getAggregations().stream().anyMatch(agg -> agg.getName().equals(facet)),
-                String.format("Failed to find facet `%s` in %s", facet,
-                    testResultNested.getMetadata().getAggregations().stream()
-                        .map(AggregationMetadata::getName).collect(Collectors.toList())));
+                    String.format("Failed to find facet `%s` in %s", facet,
+                            testResultNested.getMetadata().getAggregations().stream()
+                                    .map(AggregationMetadata::getName).collect(Collectors.toList())));
         });
 
         List<AggregationMetadata> expectedNestedAgg = testResultNested.getMetadata().getAggregations().stream().filter(
-            agg -> agg.getName().equals(singleNestedFacet)).collect(Collectors.toList());
+                agg -> agg.getName().equals(singleNestedFacet)).collect(Collectors.toList());
         assertEquals(expectedNestedAgg.size(), 1);
         AggregationMetadata nestedAgg = expectedNestedAgg.get(0);
         assertEquals(nestedAgg.getDisplayName(), String.format("Type%sOwned By", AGGREGATION_SEPARATOR_CHAR));
@@ -959,7 +955,7 @@ public void testScrollAcrossEntities() throws IOException {
         int totalResults = 0;
         String scrollId = null;
         do {
-            ScrollResult result = scroll(searchService, query, batchSize, scrollId);
+            ScrollResult result = scroll(getSearchService(), query, batchSize, scrollId);
             int numResults = result.hasEntities() ? result.getEntities().size() : 0;
             assertTrue(numResults <= batchSize);
             totalResults += numResults;
@@ -972,13 +968,13 @@ public void testScrollAcrossEntities() throws IOException {
     @Test
     public void testSearchAcrossMultipleEntities() {
         String query = "logging_events";
-        SearchResult result = search(searchService, query);
+        SearchResult result = search(getSearchService(), query);
         assertEquals((int) result.getNumEntities(), 8);
-        result = search(searchService, List.of(DATASET_ENTITY_NAME, DATA_JOB_ENTITY_NAME), query);
+        result = search(getSearchService(), List.of(DATASET_ENTITY_NAME, DATA_JOB_ENTITY_NAME), query);
         assertEquals((int) result.getNumEntities(), 8);
-        result = search(searchService, List.of(DATASET_ENTITY_NAME), query);
+        result = search(getSearchService(), List.of(DATASET_ENTITY_NAME), query);
         assertEquals((int) result.getNumEntities(), 4);
-        result = search(searchService, List.of(DATA_JOB_ENTITY_NAME), query);
+        result = search(getSearchService(), List.of(DATA_JOB_ENTITY_NAME), query);
         assertEquals((int) result.getNumEntities(), 4);
     }
 
@@ -1046,7 +1042,7 @@ public void testFragmentUrns() {
         );
 
         testSet.forEach(query -> {
-            SearchResult result = searchAcrossEntities(searchService, query);
+            SearchResult result = searchAcrossEntities(getSearchService(), query);
 
             assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                     String.format("%s - Expected partial urn search results", query));
@@ -1064,7 +1060,7 @@ public void testPlatformTest() {
         List<SearchResult> results = testFields.stream()
                 .map(fieldName -> {
                     final String query = String.format("%s:%s", fieldName, testPlatform.replaceAll(":", "\\\\:"));
-                    SearchResult result = searchStructured(searchService, query);
+                    SearchResult result = searchStructured(getSearchService(), query);
                     assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                             String.format("%s - Expected search results", query));
                     assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
@@ -1095,7 +1091,7 @@ public void testPlatformTest() {
             // Test field variations with/without .keyword
             List<SearchResult> entityClientResults = testFilters.stream().map(filter -> {
                 try {
-                    return entityClient.search("dataset", "*", filter, null, 0, 100,
+                    return getEntityClient().search("dataset", "*", filter, null, 0, 100,
                             AUTHENTICATION, new SearchFlags().setFulltext(fulltextFlag));
                 } catch (RemoteInvocationException e) {
                     throw new RuntimeException(e);
@@ -1112,7 +1108,7 @@ public void testPlatformTest() {
     @Test
     public void testStructQueryFieldMatch() {
         String query = STRUCTURED_QUERY_PREFIX + "name: customers";
-        SearchResult result = searchAcrossEntities(searchService, query);
+        SearchResult result = searchAcrossEntities(getSearchService(), query);
 
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
@@ -1125,7 +1121,7 @@ public void testStructQueryFieldMatch() {
     @Test
     public void testStructQueryFieldPrefixMatch() {
         String query = STRUCTURED_QUERY_PREFIX + "name: customers*";
-        SearchResult result = searchAcrossEntities(searchService, query);
+        SearchResult result = searchAcrossEntities(getSearchService(), query);
 
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
@@ -1138,7 +1134,7 @@ public void testStructQueryFieldPrefixMatch() {
     @Test
     public void testStructQueryCustomPropertiesKeyPrefix() {
         String query = STRUCTURED_QUERY_PREFIX + "customProperties: node_type=*";
-        SearchResult result = searchAcrossEntities(searchService, query);
+        SearchResult result = searchAcrossEntities(getSearchService(), query);
 
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
@@ -1151,7 +1147,7 @@ public void testStructQueryCustomPropertiesKeyPrefix() {
     @Test
     public void testStructQueryCustomPropertiesMatch() {
         String query = STRUCTURED_QUERY_PREFIX + "customProperties: node_type=model";
-        SearchResult result = searchAcrossEntities(searchService, query);
+        SearchResult result = searchAcrossEntities(getSearchService(), query);
 
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
@@ -1169,7 +1165,7 @@ public void testCustomPropertiesQuoted() {
         );
 
         Map<String, SearchResult> results = expectedResults.entrySet().stream()
-                .collect(Collectors.toMap(Map.Entry::getKey, entry -> searchAcrossEntities(searchService, entry.getKey())));
+                .collect(Collectors.toMap(Map.Entry::getKey, entry -> searchAcrossEntities(getSearchService(), entry.getKey())));
 
         results.forEach((key, value) -> {
             Integer actualCount = value.getEntities().size();
@@ -1183,7 +1179,7 @@ public void testCustomPropertiesQuoted() {
     @Test
     public void testStructQueryFieldPaths() {
         String query = STRUCTURED_QUERY_PREFIX + "fieldPaths: customer_id";
-        SearchResult result = searchAcrossEntities(searchService, query);
+        SearchResult result = searchAcrossEntities(getSearchService(), query);
 
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
@@ -1196,7 +1192,7 @@ public void testStructQueryFieldPaths() {
     @Test
     public void testStructQueryBoolean() {
         String query = STRUCTURED_QUERY_PREFIX + "editedFieldTags:urn\\:li\\:tag\\:Legacy OR tags:urn\\:li\\:tag\\:testTag";
-        SearchResult result = searchAcrossEntities(searchService, query);
+        SearchResult result = searchAcrossEntities(getSearchService(), query);
 
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
@@ -1206,7 +1202,7 @@ public void testStructQueryBoolean() {
         assertEquals(result.getEntities().size(), 2);
 
         query = STRUCTURED_QUERY_PREFIX + "editedFieldTags:urn\\:li\\:tag\\:Legacy";
-        result = searchAcrossEntities(searchService, query);
+        result = searchAcrossEntities(getSearchService(), query);
 
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
@@ -1216,7 +1212,7 @@ public void testStructQueryBoolean() {
         assertEquals(result.getEntities().size(), 1);
 
         query = STRUCTURED_QUERY_PREFIX + "tags:urn\\:li\\:tag\\:testTag";
-        result = searchAcrossEntities(searchService, query);
+        result = searchAcrossEntities(getSearchService(), query);
 
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
@@ -1229,7 +1225,7 @@ public void testStructQueryBoolean() {
     @Test
     public void testStructQueryBrowsePaths() {
         String query = STRUCTURED_QUERY_PREFIX + "browsePaths:*/dbt/*";
-        SearchResult result = searchAcrossEntities(searchService, query);
+        SearchResult result = searchAcrossEntities(getSearchService(), query);
 
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
@@ -1242,7 +1238,7 @@ public void testStructQueryBrowsePaths() {
     @Test
     public void testOr() {
         String query = "stg_customers | logging_events";
-        SearchResult result = searchAcrossEntities(searchService, query);
+        SearchResult result = searchAcrossEntities(getSearchService(), query);
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
         assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
@@ -1250,7 +1246,7 @@ public void testOr() {
         assertEquals(result.getEntities().size(), 9);
 
         query = "stg_customers";
-        result = searchAcrossEntities(searchService, query);
+        result = searchAcrossEntities(getSearchService(), query);
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
         assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
@@ -1258,7 +1254,7 @@ public void testOr() {
         assertEquals(result.getEntities().size(), 1);
 
         query = "logging_events";
-        result = searchAcrossEntities(searchService, query);
+        result = searchAcrossEntities(getSearchService(), query);
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
         assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
@@ -1269,7 +1265,7 @@ public void testOr() {
     @Test
     public void testNegate() {
         String query = "logging_events -bckp";
-        SearchResult result = searchAcrossEntities(searchService, query);
+        SearchResult result = searchAcrossEntities(getSearchService(), query);
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
         assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
@@ -1277,7 +1273,7 @@ public void testNegate() {
         assertEquals(result.getEntities().size(), 7);
 
         query = "logging_events";
-        result = searchAcrossEntities(searchService, query);
+        result = searchAcrossEntities(getSearchService(), query);
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
         assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
@@ -1288,7 +1284,7 @@ public void testNegate() {
     @Test
     public void testPrefix() {
         String query = "bigquery";
-        SearchResult result = searchAcrossEntities(searchService, query);
+        SearchResult result = searchAcrossEntities(getSearchService(), query);
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
         assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
@@ -1296,7 +1292,7 @@ public void testPrefix() {
         assertEquals(result.getEntities().size(), 8);
 
         query = "big*";
-        result = searchAcrossEntities(searchService, query);
+        result = searchAcrossEntities(getSearchService(), query);
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
         assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
@@ -1307,7 +1303,7 @@ public void testPrefix() {
     @Test
     public void testParens() {
         String query = "dbt | (bigquery + covid19)";
-        SearchResult result = searchAcrossEntities(searchService, query);
+        SearchResult result = searchAcrossEntities(getSearchService(), query);
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
         assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
@@ -1315,7 +1311,7 @@ public void testParens() {
         assertEquals(result.getEntities().size(), 11);
 
         query = "dbt";
-        result = searchAcrossEntities(searchService, query);
+        result = searchAcrossEntities(getSearchService(), query);
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
         assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
@@ -1323,7 +1319,7 @@ public void testParens() {
         assertEquals(result.getEntities().size(), 9);
 
         query = "bigquery + covid19";
-        result = searchAcrossEntities(searchService, query);
+        result = searchAcrossEntities(getSearchService(), query);
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
         assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
@@ -1331,7 +1327,7 @@ public void testParens() {
         assertEquals(result.getEntities().size(), 2);
 
         query = "bigquery";
-        result = searchAcrossEntities(searchService, query);
+        result = searchAcrossEntities(getSearchService(), query);
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
         assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
@@ -1339,7 +1335,7 @@ public void testParens() {
         assertEquals(result.getEntities().size(), 8);
 
         query = "covid19";
-        result = searchAcrossEntities(searchService, query);
+        result = searchAcrossEntities(getSearchService(), query);
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
         assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
@@ -1349,55 +1345,55 @@ public void testParens() {
     @Test
     public void testGram() {
         String query = "jaffle shop customers";
-        SearchResult result = searchAcrossEntities(searchService, query);
+        SearchResult result = searchAcrossEntities(getSearchService(), query);
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
-            String.format("%s - Expected search results", query));
+                String.format("%s - Expected search results", query));
 
         assertEquals(result.getEntities().get(0).getEntity().toString(),
-            "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)",
-            "Expected exact match in 1st position");
+                "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)",
+                "Expected exact match in 1st position");
 
         query = "shop customers source";
-        result = searchAcrossEntities(searchService, query);
+        result = searchAcrossEntities(getSearchService(), query);
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
-            String.format("%s - Expected search results", query));
+                String.format("%s - Expected search results", query));
 
         assertEquals(result.getEntities().get(0).getEntity().toString(),
-            "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers_source,PROD)",
-            "Expected ngram match in 1st position");
+                "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers_source,PROD)",
+                "Expected ngram match in 1st position");
 
         query = "jaffle shop stg customers";
-        result = searchAcrossEntities(searchService, query);
+        result = searchAcrossEntities(getSearchService(), query);
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
-            String.format("%s - Expected search results", query));
+                String.format("%s - Expected search results", query));
 
         assertEquals(result.getEntities().get(0).getEntity().toString(),
-            "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_customers,PROD)",
-            "Expected ngram match in 1st position");
+                "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_customers,PROD)",
+                "Expected ngram match in 1st position");
 
         query = "jaffle shop transformers customers";
-        result = searchAcrossEntities(searchService, query);
+        result = searchAcrossEntities(getSearchService(), query);
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
-            String.format("%s - Expected search results", query));
+                String.format("%s - Expected search results", query));
 
         assertEquals(result.getEntities().get(0).getEntity().toString(),
-            "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.transformers_customers,PROD)",
-            "Expected ngram match in 1st position");
+                "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.transformers_customers,PROD)",
+                "Expected ngram match in 1st position");
 
         query = "shop raw customers";
-        result = searchAcrossEntities(searchService, query);
+        result = searchAcrossEntities(getSearchService(), query);
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
-            String.format("%s - Expected search results", query));
+                String.format("%s - Expected search results", query));
 
         assertEquals(result.getEntities().get(0).getEntity().toString(),
-            "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_customers,PROD)",
-            "Expected ngram match in 1st position");
+                "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_customers,PROD)",
+                "Expected ngram match in 1st position");
     }
 
     @Test
     public void testPrefixVsExact() {
         String query = "\"customers\"";
-        SearchResult result = searchAcrossEntities(searchService, query);
+        SearchResult result = searchAcrossEntities(getSearchService(), query);
 
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                 String.format("%s - Expected search results", query));
@@ -1415,7 +1411,7 @@ public void testPrefixVsExact() {
     public void testPrefixVsExactCaseSensitivity() {
         List<String> insensitiveExactMatches = List.of("testExactMatchCase", "testexactmatchcase", "TESTEXACTMATCHCASE");
         for (String query : insensitiveExactMatches) {
-            SearchResult result = searchAcrossEntities(searchService, query);
+            SearchResult result = searchAcrossEntities(getSearchService(), query);
 
             assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
                     String.format("%s - Expected search results", query));
@@ -1432,33 +1428,33 @@ public void testPrefixVsExactCaseSensitivity() {
     @Test
     public void testColumnExactMatch() {
         String query = "unit_data";
-        SearchResult result = searchAcrossEntities(searchService, query);
+        SearchResult result = searchAcrossEntities(getSearchService(), query);
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
-            String.format("%s - Expected search results", query));
+                String.format("%s - Expected search results", query));
         assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
-            String.format("%s - Expected search results to include matched fields", query));
+                String.format("%s - Expected search results to include matched fields", query));
 
         assertTrue(result.getEntities().size() > 2,
-            String.format("%s - Expected search results to have at least two results", query));
+                String.format("%s - Expected search results to have at least two results", query));
         assertEquals(result.getEntities().get(0).getEntity().toString(),
-            "urn:li:dataset:(urn:li:dataPlatform:testOnly," + query + ",PROD)",
-            "Expected table name exact match first");
+                "urn:li:dataset:(urn:li:dataPlatform:testOnly," + query + ",PROD)",
+                "Expected table name exact match first");
 
         query = "special_column_only_present_here_info";
-        result = searchAcrossEntities(searchService, query);
+        result = searchAcrossEntities(getSearchService(), query);
         assertTrue(result.hasEntities() && !result.getEntities().isEmpty(),
-            String.format("%s - Expected search results", query));
+                String.format("%s - Expected search results", query));
         assertTrue(result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
-            String.format("%s - Expected search results to include matched fields", query));
+                String.format("%s - Expected search results to include matched fields", query));
 
         assertTrue(result.getEntities().size() > 2,
-            String.format("%s - Expected search results to have at least two results", query));
+                String.format("%s - Expected search results to have at least two results", query));
         assertEquals(result.getEntities().get(0).getEntity().toString(),
-            "urn:li:dataset:(urn:li:dataPlatform:testOnly," + "important_units" + ",PROD)",
-            "Expected table with column name exact match first");
+                "urn:li:dataset:(urn:li:dataPlatform:testOnly," + "important_units" + ",PROD)",
+                "Expected table with column name exact match first");
     }
 
     private Stream<AnalyzeResponse.AnalyzeToken> getTokens(AnalyzeRequest request) throws IOException {
-        return _searchClient.indices().analyze(request, RequestOptions.DEFAULT).getTokens().stream();
+        return getSearchClient().indices().analyze(request, RequestOptions.DEFAULT).getTokens().stream();
     }
 }
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/IndexBuilderTestBase.java
similarity index 85%
rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilderTest.java
rename to metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/IndexBuilderTestBase.java
index 2416280cb8f93..4472af339c074 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilderTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/IndexBuilderTestBase.java
@@ -1,43 +1,40 @@
-package com.linkedin.metadata.search.elasticsearch.indexbuilder;
+package com.linkedin.metadata.search.indexbuilder;
 
-import com.linkedin.metadata.config.search.ElasticSearchConfiguration;
 import com.google.common.collect.ImmutableMap;
-import com.linkedin.metadata.ESTestConfiguration;
+import com.linkedin.metadata.config.search.ElasticSearchConfiguration;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
 import com.linkedin.metadata.systemmetadata.SystemMetadataMappingsBuilder;
 import com.linkedin.metadata.version.GitVersion;
-import java.util.Optional;
-import org.elasticsearch.ElasticsearchException;
-import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest;
-import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.client.indices.GetIndexRequest;
-import org.elasticsearch.client.IndicesClient;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.indices.GetIndexResponse;
-import org.elasticsearch.cluster.metadata.AliasMetadata;
-import org.elasticsearch.rest.RestStatus;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.context.annotation.Import;
+import org.opensearch.OpenSearchException;
+import org.opensearch.action.admin.indices.alias.get.GetAliasesRequest;
+import org.opensearch.action.admin.indices.delete.DeleteIndexRequest;
+import org.opensearch.client.IndicesClient;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.client.indices.GetIndexRequest;
+import org.opensearch.client.indices.GetIndexResponse;
+import org.opensearch.cluster.metadata.AliasMetadata;
+import org.opensearch.rest.RestStatus;
 import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
+import javax.annotation.Nonnull;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
+import java.util.Optional;
 import java.util.stream.Collectors;
 
-import static org.testng.Assert.assertTrue;
-import static org.testng.Assert.assertEquals;
-import static org.testng.Assert.assertNotEquals;
+import static org.testng.Assert.*;
+
+abstract public class IndexBuilderTestBase extends AbstractTestNGSpringContextTests {
 
-@Import(ESTestConfiguration.class)
-public class ESIndexBuilderTest extends AbstractTestNGSpringContextTests {
+    @Nonnull
+    abstract protected RestHighLevelClient getSearchClient();
 
-    @Autowired
-    private RestHighLevelClient _searchClient;
     private static IndicesClient _indexClient;
     private static final String TEST_INDEX_NAME = "esindex_builder_test";
     private static ESIndexBuilder testDefaultBuilder;
@@ -45,9 +42,9 @@ public class ESIndexBuilderTest extends AbstractTestNGSpringContextTests {
 
     @BeforeClass
     public void setup() {
-        _indexClient = _searchClient.indices();
+        _indexClient = getSearchClient().indices();
         GitVersion gitVersion = new GitVersion("0.0.0-test", "123456", Optional.empty());
-        testDefaultBuilder = new ESIndexBuilder(_searchClient, 1, 0, 0,
+        testDefaultBuilder = new ESIndexBuilder(getSearchClient(), 1, 0, 0,
                 0, Map.of(), false, false,
                 new ElasticSearchConfiguration(), gitVersion);
     }
@@ -65,7 +62,7 @@ public static void wipe() throws Exception {
                     });
 
             _indexClient.delete(new DeleteIndexRequest(TEST_INDEX_NAME), RequestOptions.DEFAULT);
-        } catch (ElasticsearchException exception) {
+        } catch (OpenSearchException exception) {
             if (exception.status() != RestStatus.NOT_FOUND) {
                 throw exception;
             }
@@ -79,7 +76,7 @@ public static GetIndexResponse getTestIndex() throws IOException {
     @Test
     public void testESIndexBuilderCreation() throws Exception {
         GitVersion gitVersion = new GitVersion("0.0.0-test", "123456", Optional.empty());
-        ESIndexBuilder customIndexBuilder = new ESIndexBuilder(_searchClient, 2, 0, 1,
+        ESIndexBuilder customIndexBuilder = new ESIndexBuilder(getSearchClient(), 2, 0, 1,
                 0, Map.of(), false, false,
                 new ElasticSearchConfiguration(), gitVersion);
         customIndexBuilder.buildIndex(TEST_INDEX_NAME, Map.of(), Map.of());
@@ -93,7 +90,7 @@ public void testESIndexBuilderCreation() throws Exception {
     @Test
     public void testMappingReindex() throws Exception {
         GitVersion gitVersion = new GitVersion("0.0.0-test", "123456", Optional.empty());
-        ESIndexBuilder enabledMappingReindex = new ESIndexBuilder(_searchClient, 1, 0, 0,
+        ESIndexBuilder enabledMappingReindex = new ESIndexBuilder(getSearchClient(), 1, 0, 0,
                 0, Map.of(), false, true,
                 new ElasticSearchConfiguration(), gitVersion);
 
@@ -111,7 +108,7 @@ public void testMappingReindex() throws Exception {
         Map<String, Object> newProps = ((Map<String, Object>) SystemMetadataMappingsBuilder.getMappings().get("properties"))
                 .entrySet().stream()
                 .map(m -> !m.getKey().equals("urn") ? m
-                        : Map.entry("urn", ImmutableMap.<String, Object>builder().put("type", "wildcard").build()))
+                        : Map.entry("urn", ImmutableMap.<String, Object>builder().put("type", "text").build()))
                 .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
         enabledMappingReindex.buildIndex(TEST_INDEX_NAME, Map.of("properties", newProps), Map.of());
 
@@ -134,7 +131,7 @@ public void testSettingsNumberOfShardsReindex() throws Exception {
 
         String expectedShards = "5";
         GitVersion gitVersion = new GitVersion("0.0.0-test", "123456", Optional.empty());
-        ESIndexBuilder changedShardBuilder = new ESIndexBuilder(_searchClient,
+        ESIndexBuilder changedShardBuilder = new ESIndexBuilder(getSearchClient(),
                 Integer.parseInt(expectedShards),
                 testDefaultBuilder.getNumReplicas(),
                 testDefaultBuilder.getNumRetries(),
@@ -162,7 +159,7 @@ public void testSettingsNumberOfShardsReindex() throws Exception {
     public void testSettingsNoReindex() throws Exception {
         GitVersion gitVersion = new GitVersion("0.0.0-test", "123456", Optional.empty());
         List<ESIndexBuilder> noReindexBuilders = List.of(
-                new ESIndexBuilder(_searchClient,
+                new ESIndexBuilder(getSearchClient(),
                         testDefaultBuilder.getNumShards(),
                         testDefaultBuilder.getNumReplicas() + 1,
                         testDefaultBuilder.getNumRetries(),
@@ -170,7 +167,7 @@ public void testSettingsNoReindex() throws Exception {
                         Map.of(),
                         true, false,
                         new ElasticSearchConfiguration(), gitVersion),
-                new ESIndexBuilder(_searchClient,
+                new ESIndexBuilder(getSearchClient(),
                         testDefaultBuilder.getNumShards(),
                         testDefaultBuilder.getNumReplicas(),
                         testDefaultBuilder.getNumRetries(),
@@ -178,7 +175,7 @@ public void testSettingsNoReindex() throws Exception {
                         Map.of(),
                         true, false,
                         new ElasticSearchConfiguration(), gitVersion),
-               new ESIndexBuilder(_searchClient,
+               new ESIndexBuilder(getSearchClient(),
                                 testDefaultBuilder.getNumShards() + 1,
                                 testDefaultBuilder.getNumReplicas(),
                                 testDefaultBuilder.getNumRetries(),
@@ -186,7 +183,7 @@ public void testSettingsNoReindex() throws Exception {
                                 Map.of(),
                                 false, false,
                        new ElasticSearchConfiguration(), gitVersion),
-                new ESIndexBuilder(_searchClient,
+                new ESIndexBuilder(getSearchClient(),
                         testDefaultBuilder.getNumShards(),
                         testDefaultBuilder.getNumReplicas() + 1,
                         testDefaultBuilder.getNumRetries(),
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java
similarity index 98%
rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilderTest.java
rename to metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java
index 0b33185549299..0d2ce236d9f54 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilderTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java
@@ -1,8 +1,10 @@
-package com.linkedin.metadata.search.elasticsearch.indexbuilder;
+package com.linkedin.metadata.search.indexbuilder;
 
 import com.google.common.collect.ImmutableMap;
 import com.linkedin.metadata.TestEntitySpecBuilder;
 import java.util.Map;
+
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.MappingsBuilder;
 import org.testng.annotations.Test;
 
 import static org.testng.Assert.assertEquals;
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/GoldenOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/GoldenOpenSearchTest.java
new file mode 100644
index 0000000000000..3896ba749e85e
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/GoldenOpenSearchTest.java
@@ -0,0 +1,44 @@
+package com.linkedin.metadata.search.opensearch;
+
+import com.linkedin.metadata.models.registry.EntityRegistry;
+import com.linkedin.metadata.search.SearchService;
+import com.linkedin.metadata.search.fixtures.GoldenTestBase;
+import io.datahubproject.test.fixtures.search.SampleDataFixtureConfiguration;
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import org.jetbrains.annotations.NotNull;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.context.annotation.Import;
+import org.testng.annotations.Test;
+
+import static org.testng.AssertJUnit.assertNotNull;
+
+@Import({OpenSearchSuite.class, SampleDataFixtureConfiguration.class, SearchTestContainerConfiguration.class})
+public class GoldenOpenSearchTest extends GoldenTestBase {
+
+    @Autowired
+    @Qualifier("longTailSearchService")
+    protected SearchService searchService;
+
+    @Autowired
+    @Qualifier("entityRegistry")
+    private EntityRegistry entityRegistry;
+
+
+    @NotNull
+    @Override
+    protected EntityRegistry getEntityRegistry() {
+        return entityRegistry;
+    }
+
+    @NotNull
+    @Override
+    protected SearchService getSearchService() {
+        return searchService;
+    }
+
+    @Test
+    public void initTest() {
+        assertNotNull(searchService);
+    }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/IndexBuilderOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/IndexBuilderOpenSearchTest.java
new file mode 100644
index 0000000000000..312b56364bd91
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/IndexBuilderOpenSearchTest.java
@@ -0,0 +1,30 @@
+package com.linkedin.metadata.search.opensearch;
+
+import com.linkedin.metadata.search.indexbuilder.IndexBuilderTestBase;
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import org.jetbrains.annotations.NotNull;
+import org.opensearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.context.annotation.Import;
+import org.testng.annotations.Test;
+
+import static org.testng.AssertJUnit.assertNotNull;
+
+
+@Import({OpenSearchSuite.class, SearchTestContainerConfiguration.class})
+public class IndexBuilderOpenSearchTest extends IndexBuilderTestBase {
+
+    @Autowired
+    private RestHighLevelClient _searchClient;
+
+    @NotNull
+    @Override
+    protected RestHighLevelClient getSearchClient() {
+        return _searchClient;
+    }
+
+    @Test
+    public void initTest() {
+        assertNotNull(_searchClient);
+    }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageDataFixtureOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageDataFixtureOpenSearchTest.java
new file mode 100644
index 0000000000000..6fc0677ad6e39
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageDataFixtureOpenSearchTest.java
@@ -0,0 +1,43 @@
+package com.linkedin.metadata.search.opensearch;
+
+import com.linkedin.metadata.search.LineageSearchService;
+import com.linkedin.metadata.search.SearchService;
+import com.linkedin.metadata.search.fixtures.LineageDataFixtureTestBase;
+import io.datahubproject.test.fixtures.search.SearchLineageFixtureConfiguration;
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import org.jetbrains.annotations.NotNull;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.context.annotation.Import;
+import org.testng.AssertJUnit;
+import org.testng.annotations.Test;
+
+
+@Import({OpenSearchSuite.class, SearchLineageFixtureConfiguration.class, SearchTestContainerConfiguration.class})
+public class LineageDataFixtureOpenSearchTest extends LineageDataFixtureTestBase {
+
+    @Autowired
+    @Qualifier("searchLineageSearchService")
+    protected SearchService searchService;
+
+    @Autowired
+    @Qualifier("searchLineageLineageSearchService")
+    protected LineageSearchService lineageService;
+
+    @NotNull
+    @Override
+    protected LineageSearchService getLineageService() {
+        return lineageService;
+    }
+
+    @NotNull
+    @Override
+    protected SearchService getSearchService() {
+        return searchService;
+    }
+
+    @Test
+    public void initTest() {
+        AssertJUnit.assertNotNull(lineageService);
+    }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageServiceOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageServiceOpenSearchTest.java
new file mode 100644
index 0000000000000..1a6242c2211fd
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/LineageServiceOpenSearchTest.java
@@ -0,0 +1,65 @@
+package com.linkedin.metadata.search.opensearch;
+
+import com.linkedin.metadata.config.search.SearchConfiguration;
+import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration;
+import com.linkedin.metadata.search.LineageServiceTestBase;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
+import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
+import io.datahubproject.test.search.config.SearchCommonTestConfiguration;
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import org.jetbrains.annotations.NotNull;
+import org.opensearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.context.annotation.Import;
+import org.testng.AssertJUnit;
+import org.testng.annotations.Test;
+
+@Import({OpenSearchSuite.class, SearchCommonTestConfiguration.class, SearchTestContainerConfiguration.class})
+public class LineageServiceOpenSearchTest extends LineageServiceTestBase {
+
+  @Autowired
+  private RestHighLevelClient _searchClient;
+  @Autowired
+  private ESBulkProcessor _bulkProcessor;
+  @Autowired
+  private ESIndexBuilder _esIndexBuilder;
+  @Autowired
+  private SearchConfiguration _searchConfiguration;
+  @Autowired
+  private CustomSearchConfiguration _customSearchConfiguration;
+
+  @NotNull
+  @Override
+  protected RestHighLevelClient getSearchClient() {
+    return _searchClient;
+  }
+
+  @NotNull
+  @Override
+  protected ESBulkProcessor getBulkProcessor() {
+    return _bulkProcessor;
+  }
+
+  @NotNull
+  @Override
+  protected ESIndexBuilder getIndexBuilder() {
+    return _esIndexBuilder;
+  }
+
+  @NotNull
+  @Override
+  protected SearchConfiguration getSearchConfiguration() {
+    return _searchConfiguration;
+  }
+
+  @NotNull
+  @Override
+  protected CustomSearchConfiguration getCustomSearchConfiguration() {
+    return _customSearchConfiguration;
+  }
+
+  @Test
+  public void initTest() {
+    AssertJUnit.assertNotNull(_searchClient);
+  }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/OpenSearchSuite.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/OpenSearchSuite.java
new file mode 100644
index 0000000000000..559c623c97d5a
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/OpenSearchSuite.java
@@ -0,0 +1,31 @@
+package com.linkedin.metadata.search.opensearch;
+
+import io.datahubproject.test.search.OpenSearchTestContainer;
+import org.springframework.boot.test.context.TestConfiguration;
+import org.springframework.context.annotation.Bean;
+import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
+import org.testcontainers.containers.GenericContainer;
+import org.testng.annotations.AfterSuite;
+
+@TestConfiguration
+public class OpenSearchSuite extends AbstractTestNGSpringContextTests {
+
+    private static final OpenSearchTestContainer OPENSEARCH_TEST_CONTAINER;
+    private static GenericContainer<?> container;
+    static {
+        OPENSEARCH_TEST_CONTAINER = new OpenSearchTestContainer();
+    }
+
+    @AfterSuite
+    public void after() {
+        OPENSEARCH_TEST_CONTAINER.stopContainer();
+    }
+
+    @Bean(name = "testSearchContainer")
+    public GenericContainer<?> testSearchContainer() {
+        if (container == null) {
+            container = OPENSEARCH_TEST_CONTAINER.startContainer();
+        }
+        return container;
+    }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SampleDataFixtureOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SampleDataFixtureOpenSearchTest.java
new file mode 100644
index 0000000000000..081eb5f70fc85
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SampleDataFixtureOpenSearchTest.java
@@ -0,0 +1,44 @@
+package com.linkedin.metadata.search.opensearch;
+
+import com.linkedin.entity.client.EntityClient;
+import com.linkedin.metadata.models.registry.EntityRegistry;
+import com.linkedin.metadata.search.SearchService;
+import com.linkedin.metadata.search.fixtures.SampleDataFixtureTestBase;
+import io.datahubproject.test.fixtures.search.SampleDataFixtureConfiguration;
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import lombok.Getter;
+import org.opensearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.context.annotation.Import;
+import org.testng.annotations.Test;
+
+import static org.testng.AssertJUnit.assertNotNull;
+
+
+/**
+ * Runs sample data fixture tests for Opensearch test container
+ */
+@Getter
+@Import({OpenSearchSuite.class, SampleDataFixtureConfiguration.class, SearchTestContainerConfiguration.class})
+public class SampleDataFixtureOpenSearchTest extends SampleDataFixtureTestBase {
+    @Autowired
+    private RestHighLevelClient searchClient;
+
+    @Autowired
+    @Qualifier("sampleDataSearchService")
+    protected SearchService searchService;
+
+    @Autowired
+    @Qualifier("sampleDataEntityClient")
+    protected EntityClient entityClient;
+
+    @Autowired
+    @Qualifier("entityRegistry")
+    private EntityRegistry entityRegistry;
+
+    @Test
+    public void initTest() {
+        assertNotNull(searchClient);
+    }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchDAOOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchDAOOpenSearchTest.java
new file mode 100644
index 0000000000000..0b166975da0d1
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchDAOOpenSearchTest.java
@@ -0,0 +1,33 @@
+package com.linkedin.metadata.search.opensearch;
+
+import com.linkedin.metadata.config.search.SearchConfiguration;
+import com.linkedin.metadata.search.query.SearchDAOTestBase;
+import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
+import io.datahubproject.test.fixtures.search.SampleDataFixtureConfiguration;
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import lombok.Getter;
+import org.opensearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.context.annotation.Import;
+import org.testng.annotations.Test;
+
+import static org.testng.AssertJUnit.assertNotNull;
+
+
+@Getter
+@Import({OpenSearchSuite.class, SampleDataFixtureConfiguration.class, SearchTestContainerConfiguration.class})
+public class SearchDAOOpenSearchTest extends SearchDAOTestBase {
+  @Autowired
+  private RestHighLevelClient searchClient;
+  @Autowired
+  private SearchConfiguration searchConfiguration;
+  @Autowired
+  @Qualifier("sampleDataIndexConvention")
+  IndexConvention indexConvention;
+
+  @Test
+  public void initTest() {
+    assertNotNull(searchClient);
+  }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchServiceOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchServiceOpenSearchTest.java
new file mode 100644
index 0000000000000..8a55ba7b37ef9
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SearchServiceOpenSearchTest.java
@@ -0,0 +1,65 @@
+package com.linkedin.metadata.search.opensearch;
+
+import com.linkedin.metadata.config.search.SearchConfiguration;
+import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration;
+import com.linkedin.metadata.search.SearchServiceTestBase;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
+import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
+import io.datahubproject.test.search.config.SearchCommonTestConfiguration;
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import org.jetbrains.annotations.NotNull;
+import org.opensearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.context.annotation.Import;
+import org.testng.AssertJUnit;
+import org.testng.annotations.Test;
+
+@Import({OpenSearchSuite.class, SearchCommonTestConfiguration.class, SearchTestContainerConfiguration.class})
+public class SearchServiceOpenSearchTest extends SearchServiceTestBase {
+
+  @Autowired
+  private RestHighLevelClient _searchClient;
+  @Autowired
+  private ESBulkProcessor _bulkProcessor;
+  @Autowired
+  private ESIndexBuilder _esIndexBuilder;
+  @Autowired
+  private SearchConfiguration _searchConfiguration;
+  @Autowired
+  private CustomSearchConfiguration _customSearchConfiguration;
+
+  @NotNull
+  @Override
+  protected RestHighLevelClient getSearchClient() {
+    return _searchClient;
+  }
+
+  @NotNull
+  @Override
+  protected ESBulkProcessor getBulkProcessor() {
+    return _bulkProcessor;
+  }
+
+  @NotNull
+  @Override
+  protected ESIndexBuilder getIndexBuilder() {
+    return _esIndexBuilder;
+  }
+
+  @NotNull
+  @Override
+  protected SearchConfiguration getSearchConfiguration() {
+    return _searchConfiguration;
+  }
+
+  @NotNull
+  @Override
+  protected CustomSearchConfiguration getCustomSearchConfiguration() {
+    return _customSearchConfiguration;
+  }
+
+  @Test
+  public void initTest() {
+    AssertJUnit.assertNotNull(_searchClient);
+  }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SystemMetadataServiceOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SystemMetadataServiceOpenSearchTest.java
new file mode 100644
index 0000000000000..f0bb8e1c12479
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/SystemMetadataServiceOpenSearchTest.java
@@ -0,0 +1,47 @@
+package com.linkedin.metadata.search.opensearch;
+
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
+import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
+import com.linkedin.metadata.systemmetadata.SystemMetadataServiceTestBase;
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import org.jetbrains.annotations.NotNull;
+import org.opensearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.context.annotation.Import;
+import org.testng.AssertJUnit;
+import org.testng.annotations.Test;
+
+
+@Import({OpenSearchSuite.class, SearchTestContainerConfiguration.class})
+public class SystemMetadataServiceOpenSearchTest extends SystemMetadataServiceTestBase {
+
+  @Autowired
+  private RestHighLevelClient _searchClient;
+  @Autowired
+  private ESBulkProcessor _bulkProcessor;
+  @Autowired
+  private ESIndexBuilder _esIndexBuilder;
+
+  @NotNull
+  @Override
+  protected RestHighLevelClient getSearchClient() {
+    return _searchClient;
+  }
+
+  @NotNull
+  @Override
+  protected ESBulkProcessor getBulkProcessor() {
+    return _bulkProcessor;
+  }
+
+  @NotNull
+  @Override
+  protected ESIndexBuilder getIndexBuilder() {
+    return _esIndexBuilder;
+  }
+
+  @Test
+  public void initTest() {
+    AssertJUnit.assertNotNull(_searchClient);
+  }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TestEntityOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TestEntityOpenSearchTest.java
new file mode 100644
index 0000000000000..467f7fb43be1b
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TestEntityOpenSearchTest.java
@@ -0,0 +1,65 @@
+package com.linkedin.metadata.search.opensearch;
+
+import com.linkedin.metadata.config.search.SearchConfiguration;
+import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration;
+import com.linkedin.metadata.search.TestEntityTestBase;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
+import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
+import io.datahubproject.test.search.config.SearchCommonTestConfiguration;
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import org.jetbrains.annotations.NotNull;
+import org.opensearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.context.annotation.Import;
+import org.testng.AssertJUnit;
+import org.testng.annotations.Test;
+
+@Import({OpenSearchSuite.class, SearchCommonTestConfiguration.class, SearchTestContainerConfiguration.class})
+public class TestEntityOpenSearchTest extends TestEntityTestBase {
+
+  @Autowired
+  private RestHighLevelClient _searchClient;
+  @Autowired
+  private ESBulkProcessor _bulkProcessor;
+  @Autowired
+  private ESIndexBuilder _esIndexBuilder;
+  @Autowired
+  private SearchConfiguration _searchConfiguration;
+  @Autowired
+  private CustomSearchConfiguration _customSearchConfiguration;
+
+  @NotNull
+  @Override
+  protected RestHighLevelClient getSearchClient() {
+    return _searchClient;
+  }
+
+  @NotNull
+  @Override
+  protected ESBulkProcessor getBulkProcessor() {
+    return _bulkProcessor;
+  }
+
+  @NotNull
+  @Override
+  protected ESIndexBuilder getIndexBuilder() {
+    return _esIndexBuilder;
+  }
+
+  @NotNull
+  @Override
+  protected SearchConfiguration getSearchConfiguration() {
+    return _searchConfiguration;
+  }
+
+  @NotNull
+  @Override
+  protected CustomSearchConfiguration getCustomSearchConfiguration() {
+    return _customSearchConfiguration;
+  }
+
+  @Test
+  public void initTest() {
+    AssertJUnit.assertNotNull(_searchClient);
+  }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TimeseriesAspectServiceOpenSearchTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TimeseriesAspectServiceOpenSearchTest.java
new file mode 100644
index 0000000000000..3333b9f0942f5
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/opensearch/TimeseriesAspectServiceOpenSearchTest.java
@@ -0,0 +1,46 @@
+package com.linkedin.metadata.search.opensearch;
+
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
+import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
+import com.linkedin.metadata.timeseries.search.TimeseriesAspectServiceTestBase;
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
+import org.jetbrains.annotations.NotNull;
+import org.opensearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.context.annotation.Import;
+import org.testng.AssertJUnit;
+import org.testng.annotations.Test;
+
+@Import({OpenSearchSuite.class, SearchTestContainerConfiguration.class})
+public class TimeseriesAspectServiceOpenSearchTest extends TimeseriesAspectServiceTestBase {
+
+  @Autowired
+  private RestHighLevelClient _searchClient;
+  @Autowired
+  private ESBulkProcessor _bulkProcessor;
+  @Autowired
+  private ESIndexBuilder _esIndexBuilder;
+
+  @NotNull
+  @Override
+  protected RestHighLevelClient getSearchClient() {
+    return _searchClient;
+  }
+
+  @NotNull
+  @Override
+  protected ESBulkProcessor getBulkProcessor() {
+    return _bulkProcessor;
+  }
+
+  @NotNull
+  @Override
+  protected ESIndexBuilder getIndexBuilder() {
+    return _esIndexBuilder;
+  }
+
+  @Test
+  public void initTest() {
+    AssertJUnit.assertNotNull(_searchClient);
+  }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java
similarity index 86%
rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAOTest.java
rename to metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java
index 0a5f71345751b..91e7747afb4a1 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAOTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java
@@ -1,7 +1,8 @@
-package com.linkedin.metadata.search.elasticsearch.query;
+package com.linkedin.metadata.search.query;
 
 import com.linkedin.common.urn.Urn;
-import com.linkedin.metadata.ESTestConfiguration;
+import com.linkedin.metadata.search.elasticsearch.query.ESBrowseDAO;
+import io.datahubproject.test.search.config.SearchCommonTestConfiguration;
 import com.linkedin.metadata.config.search.SearchConfiguration;
 import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration;
 import com.linkedin.metadata.entity.TestEntityRegistry;
@@ -11,11 +12,11 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.search.SearchHit;
-import org.elasticsearch.search.SearchHits;
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.search.SearchHit;
+import org.opensearch.search.SearchHits;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.context.annotation.Import;
 import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
@@ -28,8 +29,8 @@
 import static org.mockito.Mockito.when;
 import static org.testng.Assert.assertEquals;
 
-@Import(ESTestConfiguration.class)
-public class ESBrowseDAOTest extends AbstractTestNGSpringContextTests {
+@Import(SearchCommonTestConfiguration.class)
+public class BrowseDAOTest extends AbstractTestNGSpringContextTests {
   private RestHighLevelClient _mockClient;
   private ESBrowseDAO _browseDAO;
 
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java
new file mode 100644
index 0000000000000..2dbc142d45071
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java
@@ -0,0 +1,307 @@
+package com.linkedin.metadata.search.query;
+
+import com.datahub.test.Snapshot;
+import com.google.common.collect.ImmutableList;
+import com.linkedin.data.template.LongMap;
+import com.linkedin.data.template.StringArray;
+import com.linkedin.metadata.config.search.SearchConfiguration;
+import com.linkedin.metadata.models.registry.EntityRegistry;
+import com.linkedin.metadata.models.registry.SnapshotEntityRegistry;
+import com.linkedin.metadata.query.filter.Condition;
+import com.linkedin.metadata.query.filter.ConjunctiveCriterion;
+import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray;
+import com.linkedin.metadata.query.filter.Criterion;
+import com.linkedin.metadata.query.filter.CriterionArray;
+import com.linkedin.metadata.query.filter.Filter;
+import com.linkedin.metadata.search.AggregationMetadata;
+import com.linkedin.metadata.search.AggregationMetadataArray;
+import com.linkedin.metadata.search.FilterValueArray;
+import com.linkedin.metadata.search.SearchEntityArray;
+import com.linkedin.metadata.search.SearchResult;
+import com.linkedin.metadata.search.SearchResultMetadata;
+import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO;
+import com.linkedin.metadata.utils.SearchUtil;
+import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
+import org.opensearch.client.RestHighLevelClient;
+import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import static com.linkedin.metadata.Constants.ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH;
+import static com.linkedin.metadata.utils.SearchUtil.AGGREGATION_SEPARATOR_CHAR;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNotEquals;
+import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.fail;
+
+abstract public class SearchDAOTestBase extends AbstractTestNGSpringContextTests {
+
+    abstract protected RestHighLevelClient getSearchClient();
+
+    abstract protected SearchConfiguration getSearchConfiguration();
+
+    abstract protected IndexConvention getIndexConvention();
+
+    EntityRegistry _entityRegistry = new SnapshotEntityRegistry(new Snapshot());
+
+
+    @Test
+    public void testTransformFilterForEntitiesNoChange() {
+        Criterion c = new Criterion().setValue("urn:li:tag:abc").setValues(
+                new StringArray(ImmutableList.of("urn:li:tag:abc", "urn:li:tag:def"))
+        ).setNegated(false).setCondition(Condition.EQUAL).setField("tags.keyword");
+
+        Filter f = new Filter().setOr(
+                new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(c))));
+
+        Filter transformedFilter = SearchUtil.transformFilterForEntities(f, getIndexConvention());
+        assertEquals(f, transformedFilter);
+    }
+
+    @Test
+    public void testTransformFilterForEntitiesNullFilter() {
+        Filter transformedFilter = SearchUtil.transformFilterForEntities(null, getIndexConvention());
+        assertNotNull(getIndexConvention());
+        assertEquals(null, transformedFilter);
+    }
+
+    @Test
+    public void testTransformFilterForEntitiesWithChanges() {
+
+        Criterion c = new Criterion().setValue("dataset").setValues(
+                new StringArray(ImmutableList.of("dataset"))
+        ).setNegated(false).setCondition(Condition.EQUAL).setField("_entityType");
+
+        Filter f = new Filter().setOr(
+                new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(c))));
+        Filter originalF = null;
+        try {
+            originalF = f.copy();
+        } catch (CloneNotSupportedException e) {
+            fail(e.getMessage());
+        }
+        assertEquals(f, originalF);
+
+        Filter transformedFilter = SearchUtil.transformFilterForEntities(f, getIndexConvention());
+        assertNotEquals(originalF, transformedFilter);
+
+        Criterion expectedNewCriterion = new Criterion().setValue("smpldat_datasetindex_v2").setValues(
+                new StringArray(ImmutableList.of("smpldat_datasetindex_v2"))
+        ).setNegated(false).setCondition(Condition.EQUAL).setField("_index");
+
+        Filter expectedNewFilter = new Filter().setOr(
+                new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(expectedNewCriterion))));
+
+        assertEquals(expectedNewFilter, transformedFilter);
+    }
+
+    @Test
+    public void testTransformFilterForEntitiesWithUnderscore() {
+
+        Criterion c = new Criterion().setValue("data_job").setValues(
+                new StringArray(ImmutableList.of("data_job"))
+        ).setNegated(false).setCondition(Condition.EQUAL).setField("_entityType");
+
+        Filter f = new Filter().setOr(
+                new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(c))));
+        Filter originalF = null;
+        try {
+            originalF = f.copy();
+        } catch (CloneNotSupportedException e) {
+            fail(e.getMessage());
+        }
+        assertEquals(f, originalF);
+
+        Filter transformedFilter = SearchUtil.transformFilterForEntities(f, getIndexConvention());
+        assertNotEquals(originalF, transformedFilter);
+
+        Criterion expectedNewCriterion = new Criterion().setValue("smpldat_datajobindex_v2").setValues(
+                new StringArray(ImmutableList.of("smpldat_datajobindex_v2"))
+        ).setNegated(false).setCondition(Condition.EQUAL).setField("_index");
+
+        Filter expectedNewFilter = new Filter().setOr(
+                new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(expectedNewCriterion))));
+
+        assertEquals(transformedFilter, expectedNewFilter);
+    }
+
+    @Test
+    public void testTransformFilterForEntitiesWithSomeChanges() {
+
+        Criterion criterionChanged = new Criterion().setValue("dataset").setValues(
+                new StringArray(ImmutableList.of("dataset"))
+        ).setNegated(false).setCondition(Condition.EQUAL).setField("_entityType");
+        Criterion criterionUnchanged = new Criterion().setValue("urn:li:tag:abc").setValues(
+                new StringArray(ImmutableList.of("urn:li:tag:abc", "urn:li:tag:def"))
+        ).setNegated(false).setCondition(Condition.EQUAL).setField("tags.keyword");
+
+        Filter f = new Filter().setOr(
+                new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(criterionChanged, criterionUnchanged))));
+        Filter originalF = null;
+        try {
+            originalF = f.copy();
+        } catch (CloneNotSupportedException e) {
+            fail(e.getMessage());
+        }
+        assertEquals(f, originalF);
+
+        Filter transformedFilter = SearchUtil.transformFilterForEntities(f, getIndexConvention());
+        assertNotEquals(originalF, transformedFilter);
+
+        Criterion expectedNewCriterion = new Criterion().setValue("smpldat_datasetindex_v2").setValues(
+                new StringArray(ImmutableList.of("smpldat_datasetindex_v2"))
+        ).setNegated(false).setCondition(Condition.EQUAL).setField("_index");
+
+        Filter expectedNewFilter = new Filter().setOr(
+                new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(new CriterionArray(expectedNewCriterion, criterionUnchanged))));
+
+        assertEquals(expectedNewFilter, transformedFilter);
+    }
+
+    @Test
+    public void testTransformIndexIntoEntityNameSingle() {
+        ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, getSearchClient(), getIndexConvention(), false,
+                ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), null);
+        // Empty aggregations
+        final SearchResultMetadata searchResultMetadata =
+                new SearchResultMetadata().setAggregations(new AggregationMetadataArray());
+        SearchResult result = new SearchResult().setEntities(new SearchEntityArray(new ArrayList<>()))
+                .setMetadata(searchResultMetadata)
+                .setFrom(0)
+                .setPageSize(100)
+                .setNumEntities(30);
+        SearchResult expectedResult = null;
+        try {
+            expectedResult = result.copy();
+        } catch (CloneNotSupportedException e) {
+            fail(e.getMessage());
+        }
+        assertEquals(expectedResult, searchDAO.transformIndexIntoEntityName(result));
+
+        // one facet, do not transform
+        Map<String, Long> aggMap = Map.of("urn:li:corpuser:datahub", Long.valueOf(3));
+
+        List<AggregationMetadata> aggregationMetadataList = new ArrayList<>();
+        aggregationMetadataList.add(new AggregationMetadata().setName("owners")
+                .setDisplayName("Owned by")
+                .setAggregations(new LongMap(aggMap))
+                .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(aggMap, Collections.emptySet())))
+        );
+        searchResultMetadata.setAggregations(new AggregationMetadataArray(aggregationMetadataList));
+        result.setMetadata(searchResultMetadata);
+
+        try {
+            expectedResult = result.copy();
+        } catch (CloneNotSupportedException e) {
+            fail(e.getMessage());
+        }
+        assertEquals(searchDAO.transformIndexIntoEntityName(result), expectedResult);
+
+        // one facet, transform
+        Map<String, Long> entityTypeMap = Map.of("smpldat_datasetindex_v2", Long.valueOf(3));
+
+        aggregationMetadataList = List.of(new AggregationMetadata().setName("_entityType")
+                .setDisplayName("Type")
+                .setAggregations(new LongMap(entityTypeMap))
+                .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(entityTypeMap, Collections.emptySet())))
+        );
+        searchResultMetadata.setAggregations(new AggregationMetadataArray(aggregationMetadataList));
+        result.setMetadata(searchResultMetadata);
+
+        Map<String, Long> expectedEntityTypeMap = Map.of("dataset", Long.valueOf(3));
+
+        List<AggregationMetadata> expectedAggregationMetadataList = List.of(
+                new AggregationMetadata().setName("_entityType")
+                        .setDisplayName("Type")
+                        .setAggregations(new LongMap(expectedEntityTypeMap))
+                        .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(expectedEntityTypeMap, Collections.emptySet())))
+        );
+        expectedResult.setMetadata(new SearchResultMetadata().setAggregations(new AggregationMetadataArray(expectedAggregationMetadataList)));
+        assertEquals(searchDAO.transformIndexIntoEntityName(result), expectedResult);
+    }
+
+    @Test
+    public void testTransformIndexIntoEntityNameNested() {
+        ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, getSearchClient(), getIndexConvention(), false,
+                ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), null);
+        // One nested facet
+        Map<String, Long> entityTypeMap = Map.of(
+                String.format("smpldat_datasetindex_v2%surn:li:corpuser:datahub", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(3),
+                String.format("smpldat_datasetindex_v2%surn:li:corpuser:bfoo", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(7),
+                "smpldat_datasetindex_v2", Long.valueOf(20)
+        );
+        List<AggregationMetadata> aggregationMetadataList = List.of(new AggregationMetadata().setName("_entityType␞owners")
+                .setDisplayName("Type␞Owned By")
+                .setAggregations(new LongMap(entityTypeMap))
+                .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(entityTypeMap, Collections.emptySet())))
+        );
+        SearchResult result = new SearchResult().setEntities(new SearchEntityArray(new ArrayList<>()))
+                .setMetadata(new SearchResultMetadata().setAggregations(
+                        new AggregationMetadataArray(aggregationMetadataList)
+                ))
+                .setFrom(0)
+                .setPageSize(100)
+                .setNumEntities(50);
+
+        Map<String, Long> expectedEntityTypeMap = Map.of(
+                String.format("dataset%surn:li:corpuser:datahub", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(3),
+                String.format("dataset%surn:li:corpuser:bfoo", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(7),
+                "dataset", Long.valueOf(20)
+        );
+
+        List<AggregationMetadata> expectedAggregationMetadataList = List.of(new AggregationMetadata().setName("_entityType␞owners")
+                .setDisplayName("Type␞Owned By")
+                .setAggregations(new LongMap(expectedEntityTypeMap))
+                .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(expectedEntityTypeMap, Collections.emptySet())))
+        );
+        SearchResult expectedResult = new SearchResult().setEntities(new SearchEntityArray(new ArrayList<>()))
+                .setMetadata(new SearchResultMetadata().setAggregations(
+                        new AggregationMetadataArray(expectedAggregationMetadataList)))
+                .setFrom(0)
+                .setPageSize(100)
+                .setNumEntities(50);
+        assertEquals(searchDAO.transformIndexIntoEntityName(result), expectedResult);
+
+        // One nested facet, opposite order
+        entityTypeMap = Map.of(
+                String.format("urn:li:corpuser:datahub%ssmpldat_datasetindex_v2", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(3),
+                String.format("urn:li:corpuser:datahub%ssmpldat_chartindex_v2", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(7),
+                "urn:li:corpuser:datahub", Long.valueOf(20)
+        );
+        aggregationMetadataList = List.of(new AggregationMetadata().setName("owners␞_entityType")
+                .setDisplayName("Owned By␞Type")
+                .setAggregations(new LongMap(entityTypeMap))
+                .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(entityTypeMap, Collections.emptySet())))
+        );
+        result = new SearchResult().setEntities(new SearchEntityArray(new ArrayList<>()))
+                .setMetadata(new SearchResultMetadata().setAggregations(
+                        new AggregationMetadataArray(aggregationMetadataList)
+                ))
+                .setFrom(0)
+                .setPageSize(100)
+                .setNumEntities(50);
+
+        expectedEntityTypeMap = Map.of(
+                String.format("urn:li:corpuser:datahub%sdataset", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(3),
+                String.format("urn:li:corpuser:datahub%schart", AGGREGATION_SEPARATOR_CHAR), Long.valueOf(7),
+                "urn:li:corpuser:datahub", Long.valueOf(20)
+        );
+
+        expectedAggregationMetadataList = List.of(new AggregationMetadata().setName("owners␞_entityType")
+                .setDisplayName("Owned By␞Type")
+                .setAggregations(new LongMap(expectedEntityTypeMap))
+                .setFilterValues(new FilterValueArray(SearchUtil.convertToFilters(expectedEntityTypeMap, Collections.emptySet())))
+        );
+        expectedResult = new SearchResult().setEntities(new SearchEntityArray(new ArrayList<>()))
+                .setMetadata(new SearchResultMetadata().setAggregations(
+                        new AggregationMetadataArray(expectedAggregationMetadataList)))
+                .setFrom(0)
+                .setPageSize(100)
+                .setNumEntities(50);
+        assertEquals(searchDAO.transformIndexIntoEntityName(result), expectedResult);
+    }
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java
similarity index 94%
rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilderTest.java
rename to metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java
index 36c8bb8f9a676..66e7b62741f4c 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilderTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java
@@ -1,4 +1,4 @@
-package com.linkedin.metadata.search.elasticsearch.query.request;
+package com.linkedin.metadata.search.query.request;
 
 import com.google.common.collect.ImmutableSet;
 import com.linkedin.metadata.config.search.SearchConfiguration;
@@ -9,7 +9,9 @@
 import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
-import org.elasticsearch.search.aggregations.AggregationBuilder;
+
+import com.linkedin.metadata.search.elasticsearch.query.request.AggregationQueryBuilder;
+import org.opensearch.search.aggregations.AggregationBuilder;
 import org.testng.Assert;
 import org.testng.annotations.Test;
 
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java
similarity index 88%
rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandlerTest.java
rename to metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java
index be91cb0288950..34b98f38254cd 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandlerTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java
@@ -1,15 +1,17 @@
-package com.linkedin.metadata.search.elasticsearch.query.request;
+package com.linkedin.metadata.search.query.request;
 
 import com.linkedin.metadata.TestEntitySpecBuilder;
 import java.util.List;
 import java.util.Map;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.MatchPhrasePrefixQueryBuilder;
-import org.elasticsearch.index.query.MatchQueryBuilder;
-import org.elasticsearch.index.query.MultiMatchQueryBuilder;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
+
+import com.linkedin.metadata.search.elasticsearch.query.request.AutocompleteRequestHandler;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder;
+import org.opensearch.index.query.MatchQueryBuilder;
+import org.opensearch.index.query.MultiMatchQueryBuilder;
+import org.opensearch.search.builder.SearchSourceBuilder;
+import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder;
 import org.testng.annotations.Test;
 
 import static org.testng.Assert.assertEquals;
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/CustomizedQueryHandlerTest.java
similarity index 93%
rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandlerTest.java
rename to metadata-io/src/test/java/com/linkedin/metadata/search/query/request/CustomizedQueryHandlerTest.java
index 3dad9c59c6b53..6b6664ffdf30e 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandlerTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/CustomizedQueryHandlerTest.java
@@ -1,4 +1,4 @@
-package com.linkedin.metadata.search.elasticsearch.query.request;
+package com.linkedin.metadata.search.query.request;
 
 import com.linkedin.metadata.config.search.CustomConfiguration;
 import com.linkedin.metadata.config.search.SearchConfiguration;
@@ -7,12 +7,14 @@
 import com.linkedin.metadata.config.search.custom.QueryConfiguration;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.dataformat.yaml.YAMLMapper;
-import org.elasticsearch.common.lucene.search.function.CombineFunction;
-import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
-import org.elasticsearch.index.query.MatchAllQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
-import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
+import com.linkedin.metadata.search.elasticsearch.query.request.CustomizedQueryHandler;
+import com.linkedin.metadata.search.elasticsearch.query.request.SearchQueryBuilder;
+import org.opensearch.common.lucene.search.function.CombineFunction;
+import org.opensearch.common.lucene.search.function.FunctionScoreQuery;
+import org.opensearch.index.query.MatchAllQueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder;
+import org.opensearch.index.query.functionscore.ScoreFunctionBuilders;
 import org.testng.annotations.Test;
 
 import java.io.IOException;
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchQueryBuilderTest.java
similarity index 95%
rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java
rename to metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchQueryBuilderTest.java
index 8e73b0ceeae8d..9c0815efdc8b4 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchQueryBuilderTest.java
@@ -1,8 +1,10 @@
-package com.linkedin.metadata.search.elasticsearch.query.request;
+package com.linkedin.metadata.search.query.request;
 
 import com.linkedin.data.schema.DataSchema;
 import com.linkedin.data.schema.PathSpec;
-import com.linkedin.metadata.ESTestConfiguration;
+import com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig;
+import com.linkedin.metadata.search.elasticsearch.query.request.SearchQueryBuilder;
+import io.datahubproject.test.search.config.SearchCommonTestConfiguration;
 import com.linkedin.metadata.config.search.CustomConfiguration;
 import com.linkedin.metadata.config.search.ExactMatchConfiguration;
 import com.linkedin.metadata.config.search.PartialConfiguration;
@@ -26,15 +28,15 @@
 
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.util.Pair;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.MatchAllQueryBuilder;
-import org.elasticsearch.index.query.MatchPhrasePrefixQueryBuilder;
-import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.index.query.QueryStringQueryBuilder;
-import org.elasticsearch.index.query.SimpleQueryStringBuilder;
-import org.elasticsearch.index.query.TermQueryBuilder;
-import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.MatchAllQueryBuilder;
+import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder;
+import org.opensearch.index.query.MatchPhraseQueryBuilder;
+import org.opensearch.index.query.QueryBuilder;
+import org.opensearch.index.query.QueryStringQueryBuilder;
+import org.opensearch.index.query.SimpleQueryStringBuilder;
+import org.opensearch.index.query.TermQueryBuilder;
+import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder;
 import org.mockito.Mockito;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.context.annotation.Import;
@@ -50,7 +52,7 @@
 import static org.testng.Assert.assertNull;
 import static org.testng.Assert.assertTrue;
 
-@Import(ESTestConfiguration.class)
+@Import(SearchCommonTestConfiguration.class)
 public class SearchQueryBuilderTest extends AbstractTestNGSpringContextTests {
 
   @Autowired
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
similarity index 95%
rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandlerTest.java
rename to metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
index db56e2d34881b..90c6c523c588f 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandlerTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
@@ -1,11 +1,12 @@
-package com.linkedin.metadata.search.elasticsearch.query.request;
+package com.linkedin.metadata.search.query.request;
 
 import com.linkedin.metadata.config.search.ExactMatchConfiguration;
 import com.linkedin.metadata.config.search.PartialConfiguration;
 import com.linkedin.metadata.config.search.SearchConfiguration;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.data.template.StringArray;
-import com.linkedin.metadata.ESTestConfiguration;
+import com.linkedin.metadata.search.elasticsearch.query.request.SearchRequestHandler;
+import io.datahubproject.test.search.config.SearchCommonTestConfiguration;
 import com.linkedin.metadata.TestEntitySpecBuilder;
 import com.linkedin.metadata.config.search.WordGramConfiguration;
 import java.util.ArrayList;
@@ -28,17 +29,17 @@
 import com.linkedin.metadata.query.filter.Criterion;
 import com.linkedin.metadata.query.filter.CriterionArray;
 import com.linkedin.metadata.query.filter.Filter;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.ExistsQueryBuilder;
-import org.elasticsearch.index.query.MatchQueryBuilder;
-import org.elasticsearch.index.query.MultiMatchQueryBuilder;
-import org.elasticsearch.index.query.TermsQueryBuilder;
-import org.elasticsearch.search.aggregations.AggregationBuilder;
-import org.elasticsearch.search.aggregations.AggregationBuilders;
-import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.ExistsQueryBuilder;
+import org.opensearch.index.query.MatchQueryBuilder;
+import org.opensearch.index.query.MultiMatchQueryBuilder;
+import org.opensearch.index.query.TermsQueryBuilder;
+import org.opensearch.search.aggregations.AggregationBuilder;
+import org.opensearch.search.aggregations.AggregationBuilders;
+import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
+import org.opensearch.search.builder.SearchSourceBuilder;
+import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.context.annotation.Import;
 import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
@@ -48,7 +49,7 @@
 import static org.testng.Assert.*;
 
 
-@Import(ESTestConfiguration.class)
+@Import(SearchCommonTestConfiguration.class)
 public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests {
   @Autowired
   private EntityRegistry entityRegistry;
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java
index 4f364c246818f..ddd75a152c333 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java
@@ -4,7 +4,7 @@
 import com.linkedin.data.template.StringArray;
 import com.linkedin.metadata.query.filter.Condition;
 import com.linkedin.metadata.query.filter.Criterion;
-import org.elasticsearch.index.query.QueryBuilder;
+import org.opensearch.index.query.QueryBuilder;
 import org.testng.Assert;
 import org.testng.annotations.Test;
 
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java
similarity index 84%
rename from metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataServiceTest.java
rename to metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java
index 6e116df5b2906..e6a9bd7d198f7 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java
@@ -1,6 +1,5 @@
 package com.linkedin.metadata.systemmetadata;
 
-import com.linkedin.metadata.ESTestConfiguration;
 import com.linkedin.metadata.run.AspectRowSummary;
 import com.linkedin.metadata.run.IngestionRunSummary;
 import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
@@ -9,9 +8,7 @@
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl;
 import com.linkedin.mxe.SystemMetadata;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.context.annotation.Import;
+import org.opensearch.client.RestHighLevelClient;
 import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.BeforeMethod;
@@ -20,18 +17,20 @@
 import javax.annotation.Nonnull;
 import java.util.List;
 
-import static com.linkedin.metadata.ESTestConfiguration.syncAfterWrite;
+import static io.datahubproject.test.search.SearchTestUtils.syncAfterWrite;
 import static org.testng.Assert.assertEquals;
 
-@Import(ESTestConfiguration.class)
-public class ElasticSearchSystemMetadataServiceTest extends AbstractTestNGSpringContextTests {
+abstract public class SystemMetadataServiceTestBase extends AbstractTestNGSpringContextTests {
+
+  @Nonnull
+  abstract protected RestHighLevelClient getSearchClient();
+
+  @Nonnull
+  abstract protected ESBulkProcessor getBulkProcessor();
+
+  @Nonnull
+  abstract protected ESIndexBuilder getIndexBuilder();
 
-  @Autowired
-  private RestHighLevelClient _searchClient;
-  @Autowired
-  private ESBulkProcessor _bulkProcessor;
-  @Autowired
-  private ESIndexBuilder _esIndexBuilder;
   private final IndexConvention _indexConvention = new IndexConventionImpl("es_system_metadata_service_test");
 
   private ElasticSearchSystemMetadataService _client;
@@ -49,8 +48,8 @@ public void wipe() throws Exception {
 
   @Nonnull
   private ElasticSearchSystemMetadataService buildService() {
-    ESSystemMetadataDAO dao = new ESSystemMetadataDAO(_searchClient, _indexConvention, _bulkProcessor, 1);
-    return new ElasticSearchSystemMetadataService(_bulkProcessor, _indexConvention, dao, _esIndexBuilder);
+    ESSystemMetadataDAO dao = new ESSystemMetadataDAO(getSearchClient(), _indexConvention, getBulkProcessor(), 1);
+    return new ElasticSearchSystemMetadataService(getBulkProcessor(), _indexConvention, dao, getIndexBuilder());
   }
 
   @Test
@@ -70,7 +69,7 @@ public void testListRuns() throws Exception {
     _client.insert(metadata2, "urn:li:chart:2", "chartKey");
     _client.insert(metadata2, "urn:li:chart:2", "Ownership");
 
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     List<IngestionRunSummary> runs = _client.listRuns(0, 20, false);
 
@@ -99,7 +98,7 @@ public void testOverwriteRuns() throws Exception {
     _client.insert(metadata2, "urn:li:chart:2", "chartKey");
     _client.insert(metadata2, "urn:li:chart:2", "Ownership");
 
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     List<IngestionRunSummary> runs = _client.listRuns(0, 20, false);
 
@@ -128,7 +127,7 @@ public void testFindByRunId() throws Exception {
     _client.insert(metadata2, "urn:li:chart:2", "chartKey");
     _client.insert(metadata2, "urn:li:chart:2", "Ownership");
 
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     List<AspectRowSummary> rows = _client.findByRunId("abc-456", false, 0, ESUtils.MAX_RESULT_SIZE);
 
@@ -156,11 +155,11 @@ public void testDelete() throws Exception {
     _client.insert(metadata2, "urn:li:chart:2", "chartKey");
     _client.insert(metadata2, "urn:li:chart:2", "Ownership");
 
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     _client.deleteUrn("urn:li:chart:1");
 
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     List<AspectRowSummary> rows = _client.findByRunId("abc-456", false, 0, ESUtils.MAX_RESULT_SIZE);
 
@@ -172,7 +171,7 @@ public void testDelete() throws Exception {
   public void testInsertNullData() throws Exception {
     _client.insert(null, "urn:li:chart:1", "chartKey");
 
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     List<IngestionRunSummary> runs = _client.listRuns(0, 20, false);
 
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeline/EbeanTimelineServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeline/EbeanTimelineServiceTest.java
index 2703dd7fe6cbe..9e89328715510 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/timeline/EbeanTimelineServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/timeline/EbeanTimelineServiceTest.java
@@ -27,7 +27,7 @@ public EbeanTimelineServiceTest() throws EntityRegistryException {
 
   @BeforeMethod
   public void setupTest() {
-    Database server = EbeanTestUtils.createTestServer();
+    Database server = EbeanTestUtils.createTestServer(EbeanTimelineServiceTest.class.getSimpleName());
     _aspectDao = new EbeanAspectDao(server);
     _aspectDao.setConnectionValidated(true);
     _entityTimelineService = new TimelineServiceImpl(_aspectDao, _testEntityRegistry);
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java
similarity index 97%
rename from metadata-io/src/test/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectServiceTest.java
rename to metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java
index d65234bf89d49..cc60ba8679e1f 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java
@@ -1,4 +1,4 @@
-package com.linkedin.metadata.timeseries.elastic;
+package com.linkedin.metadata.timeseries.search;
 
 import com.datahub.test.BatchType;
 import com.datahub.test.ComplexNestedRecord;
@@ -16,7 +16,6 @@
 import com.linkedin.data.template.StringArrayArray;
 import com.linkedin.data.template.StringMap;
 import com.linkedin.data.template.StringMapArray;
-import com.linkedin.metadata.ESTestConfiguration;
 import com.linkedin.metadata.aspect.EnvelopedAspect;
 import com.linkedin.metadata.models.AspectSpec;
 import com.linkedin.metadata.models.DataSchemaFactory;
@@ -32,6 +31,7 @@
 import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
 import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
 import com.linkedin.metadata.search.utils.QueryUtils;
+import com.linkedin.metadata.timeseries.elastic.ElasticSearchTimeseriesAspectService;
 import com.linkedin.metadata.timeseries.elastic.indexbuilder.TimeseriesAspectIndexBuilders;
 import com.linkedin.metadata.timeseries.transformer.TimeseriesAspectTransformer;
 import com.linkedin.metadata.utils.GenericRecordUtils;
@@ -45,9 +45,7 @@
 import com.linkedin.timeseries.GroupingBucket;
 import com.linkedin.timeseries.GroupingBucketType;
 import com.linkedin.timeseries.TimeWindowSize;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.context.annotation.Import;
+import org.opensearch.client.RestHighLevelClient;
 import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
@@ -60,15 +58,15 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
-import static com.linkedin.metadata.Constants.*;
-import static com.linkedin.metadata.ESTestConfiguration.syncAfterWrite;
+import static com.linkedin.metadata.Constants.INGESTION_MAX_SERIALIZED_STRING_LENGTH;
+import static com.linkedin.metadata.Constants.MAX_JACKSON_STRING_SIZE;
+import static io.datahubproject.test.search.SearchTestUtils.syncAfterWrite;
 import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.assertNotNull;
 import static org.testng.Assert.assertTrue;
 import static org.testng.Assert.fail;
 
-@Import(ESTestConfiguration.class)
-public class ElasticSearchTimeseriesAspectServiceTest extends AbstractTestNGSpringContextTests {
+abstract public class TimeseriesAspectServiceTestBase extends AbstractTestNGSpringContextTests {
   private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
   static {
     int maxSize = Integer.parseInt(System.getenv().getOrDefault(INGESTION_MAX_SERIALIZED_STRING_LENGTH, MAX_JACKSON_STRING_SIZE));
@@ -85,12 +83,15 @@ public class ElasticSearchTimeseriesAspectServiceTest extends AbstractTestNGSpri
   private static final String ES_FIELD_TIMESTAMP = "timestampMillis";
   private static final String ES_FIELD_STAT = "stat";
 
-  @Autowired
-  private RestHighLevelClient _searchClient;
-  @Autowired
-  private ESBulkProcessor _bulkProcessor;
-  @Autowired
-  private ESIndexBuilder _esIndexBuilder;
+  @Nonnull
+  abstract protected RestHighLevelClient getSearchClient();
+
+  @Nonnull
+  abstract protected ESBulkProcessor getBulkProcessor();
+
+  @Nonnull
+  abstract protected ESIndexBuilder getIndexBuilder();
+
   private EntityRegistry _entityRegistry;
   private IndexConvention _indexConvention;
   private ElasticSearchTimeseriesAspectService _elasticSearchTimeseriesAspectService;
@@ -116,9 +117,9 @@ public void setup() {
 
   @Nonnull
   private ElasticSearchTimeseriesAspectService buildService() {
-    return new ElasticSearchTimeseriesAspectService(_searchClient, _indexConvention,
-        new TimeseriesAspectIndexBuilders(_esIndexBuilder, _entityRegistry,
-            _indexConvention), _entityRegistry, _bulkProcessor, 1);
+    return new ElasticSearchTimeseriesAspectService(getSearchClient(), _indexConvention,
+        new TimeseriesAspectIndexBuilders(getIndexBuilder(), _entityRegistry,
+            _indexConvention), _entityRegistry, getBulkProcessor(), 1);
   }
 
   /*
@@ -190,7 +191,7 @@ public void testUpsertProfiles() throws Exception {
       }
     });
 
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
   }
 
   @Test(groups = "upsertUniqueMessageId")
@@ -216,7 +217,7 @@ public void testUpsertProfilesWithUniqueMessageIds() throws Exception {
       }
     });
 
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
 
     List<EnvelopedAspect> resultAspects =
         _elasticSearchTimeseriesAspectService.getAspectValues(urn, ENTITY_NAME, ASPECT_NAME, null, null,
@@ -860,7 +861,7 @@ public void testCountByFilter() {
 
   @Test(groups = {"testCountAfterDelete"}, dependsOnGroups = {"deleteAspectValues1"})
   public void testCountByFilterAfterDelete() throws InterruptedException {
-    syncAfterWrite(_bulkProcessor);
+    syncAfterWrite(getBulkProcessor());
     // Test with filter
     Criterion hasUrnCriterion =
         new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString());
diff --git a/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/Utils.java b/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/Utils.java
deleted file mode 100644
index f96a6c50af33d..0000000000000
--- a/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/Utils.java
+++ /dev/null
@@ -1,22 +0,0 @@
-package io.datahub.test.fixtures.elasticsearch;
-
-import com.fasterxml.jackson.core.StreamReadConstraints;
-import com.fasterxml.jackson.databind.DeserializationFeature;
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-import static com.linkedin.metadata.Constants.*;
-
-
-public class Utils {
-    private Utils() {
-
-    }
-    final public static String FIXTURE_BASE = "src/test/resources/elasticsearch";
-
-    final public static ObjectMapper OBJECT_MAPPER = new ObjectMapper()
-            .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
-    static {
-        int maxSize = Integer.parseInt(System.getenv().getOrDefault(INGESTION_MAX_SERIALIZED_STRING_LENGTH, MAX_JACKSON_STRING_SIZE));
-        OBJECT_MAPPER.getFactory().setStreamReadConstraints(StreamReadConstraints.builder().maxStringLength(maxSize).build());
-    }
-}
diff --git a/metadata-io/src/test/java/io/datahub/test/DataGenerator.java b/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java
similarity index 99%
rename from metadata-io/src/test/java/io/datahub/test/DataGenerator.java
rename to metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java
index 3b374993cde16..cfa9c1258583d 100644
--- a/metadata-io/src/test/java/io/datahub/test/DataGenerator.java
+++ b/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java
@@ -1,4 +1,4 @@
-package io.datahub.test;
+package io.datahubproject.test;
 
 import com.linkedin.common.AuditStamp;
 import com.linkedin.common.GlossaryTermAssociation;
@@ -111,7 +111,8 @@ public Stream<List<MetadataChangeProposal>> generateMCPs(String entityName, long
         }).map(mcp -> {
             // Expand with default aspects per normal
             return Stream.concat(Stream.of(mcp),
-                    AspectUtils.getAdditionalChanges(mcp, entityService, true).stream()).collect(Collectors.toList());
+                    AspectUtils.getAdditionalChanges(mcp, entityService, true).stream())
+                    .collect(Collectors.toList());
         });
     }
 
diff --git a/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/EntityExporter.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/EntityExporter.java
similarity index 81%
rename from metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/EntityExporter.java
rename to metadata-io/src/test/java/io/datahubproject/test/fixtures/search/EntityExporter.java
index 5c34b9f549d9f..18fbf86f8668d 100644
--- a/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/EntityExporter.java
+++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/EntityExporter.java
@@ -1,15 +1,15 @@
-package io.datahub.test.fixtures.elasticsearch;
+package io.datahubproject.test.fixtures.search;
 
 import lombok.Builder;
 import lombok.NonNull;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.client.indices.GetMappingsRequest;
-import org.elasticsearch.client.indices.GetMappingsResponse;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-import org.elasticsearch.search.sort.SortBuilders;
-import org.elasticsearch.search.sort.SortOrder;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.client.indices.GetMappingsRequest;
+import org.opensearch.client.indices.GetMappingsResponse;
+import org.opensearch.search.builder.SearchSourceBuilder;
+import org.opensearch.search.sort.SortBuilders;
+import org.opensearch.search.sort.SortOrder;
 
 import java.io.IOException;
 import java.util.Set;
diff --git a/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/FixtureReader.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/FixtureReader.java
similarity index 93%
rename from metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/FixtureReader.java
rename to metadata-io/src/test/java/io/datahubproject/test/fixtures/search/FixtureReader.java
index a0c551b28b507..1b804a2346883 100644
--- a/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/FixtureReader.java
+++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/FixtureReader.java
@@ -1,12 +1,12 @@
-package io.datahub.test.fixtures.elasticsearch;
+package io.datahubproject.test.fixtures.search;
 
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
 import lombok.Builder;
 import lombok.NonNull;
 import org.apache.commons.io.FilenameUtils;
-import org.elasticsearch.action.index.IndexRequest;
-import org.elasticsearch.common.xcontent.XContentType;
+import org.opensearch.action.index.IndexRequest;
+import org.opensearch.common.xcontent.XContentType;
 
 import java.io.BufferedInputStream;
 import java.io.BufferedReader;
@@ -23,12 +23,12 @@
 import java.util.stream.Stream;
 import java.util.zip.GZIPInputStream;
 
-import static io.datahub.test.fixtures.elasticsearch.Utils.OBJECT_MAPPER;
+import static io.datahubproject.test.fixtures.search.SearchFixtureUtils.OBJECT_MAPPER;
 
 @Builder
 public class FixtureReader {
     @Builder.Default
-    private String inputBase = Utils.FIXTURE_BASE;
+    private String inputBase = SearchFixtureUtils.FIXTURE_BASE;
     @NonNull
     private ESBulkProcessor bulkProcessor;
     @NonNull
diff --git a/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/FixtureWriter.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/FixtureWriter.java
similarity index 75%
rename from metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/FixtureWriter.java
rename to metadata-io/src/test/java/io/datahubproject/test/fixtures/search/FixtureWriter.java
index 36b057bc22a37..0aefa006421fc 100644
--- a/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/FixtureWriter.java
+++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/FixtureWriter.java
@@ -1,13 +1,13 @@
-package io.datahub.test.fixtures.elasticsearch;
+package io.datahubproject.test.fixtures.search;
 
 import com.fasterxml.jackson.core.JsonProcessingException;
 import lombok.Builder;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.search.SearchHit;
-import org.elasticsearch.search.SearchHits;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.search.SearchHit;
+import org.opensearch.search.SearchHits;
 
 import javax.annotation.Nullable;
 import java.io.BufferedWriter;
@@ -15,8 +15,6 @@
 import java.io.IOException;
 import java.util.function.BiConsumer;
 
-import static io.datahub.test.fixtures.elasticsearch.Utils.OBJECT_MAPPER;
-
 /**
  *
  */
@@ -26,7 +24,7 @@ public class FixtureWriter {
     private RestHighLevelClient client;
 
     @Builder.Default
-    private String outputBase = Utils.FIXTURE_BASE;
+    private String outputBase = SearchFixtureUtils.FIXTURE_BASE;
 
     public void write(SearchRequest searchRequest, String relativeOutput, boolean append) {
         write(searchRequest, relativeOutput, append, null, null, null);
@@ -53,14 +51,14 @@ public <O, C> void write(SearchRequest searchRequest, String relativeOutput, boo
                                 if (outputType == null) {
                                     bw.write(hit.getSourceAsString());
                                 } else {
-                                    O doc = OBJECT_MAPPER.readValue(hit.getSourceAsString(), outputType);
-                                    bw.write(OBJECT_MAPPER.writeValueAsString(doc));
+                                    O doc = SearchFixtureUtils.OBJECT_MAPPER.readValue(hit.getSourceAsString(), outputType);
+                                    bw.write(SearchFixtureUtils.OBJECT_MAPPER.writeValueAsString(doc));
                                 }
                                 bw.newLine();
 
                                 // Fire callback
                                 if (callback != null) {
-                                    callback.accept(hit, OBJECT_MAPPER.readValue(hit.getSourceAsString(), callbackType));
+                                    callback.accept(hit, SearchFixtureUtils.OBJECT_MAPPER.readValue(hit.getSourceAsString(), callbackType));
                                 }
                             } catch (JsonProcessingException e) {
                                 throw new RuntimeException(e);
diff --git a/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/LineageExporter.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/LineageExporter.java
similarity index 95%
rename from metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/LineageExporter.java
rename to metadata-io/src/test/java/io/datahubproject/test/fixtures/search/LineageExporter.java
index 3b236b36cdce1..5db07ee6fb8bc 100644
--- a/metadata-io/src/test/java/io/datahub/test/fixtures/elasticsearch/LineageExporter.java
+++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/LineageExporter.java
@@ -1,14 +1,14 @@
-package io.datahub.test.fixtures.elasticsearch;
+package io.datahubproject.test.fixtures.search;
 
 import com.google.common.collect.Lists;
 import lombok.Builder;
 import lombok.NonNull;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
-import org.elasticsearch.search.sort.SortBuilders;
-import org.elasticsearch.search.sort.SortOrder;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.search.builder.SearchSourceBuilder;
+import org.opensearch.search.sort.SortBuilders;
+import org.opensearch.search.sort.SortOrder;
 
 import java.net.URLDecoder;
 import java.net.URLEncoder;
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java
similarity index 94%
rename from metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java
rename to metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java
index ef9992db1fb25..45bbd912bc794 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java
+++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java
@@ -1,5 +1,6 @@
-package com.linkedin.metadata;
+package io.datahubproject.test.fixtures.search;
 
+import io.datahubproject.test.search.config.SearchCommonTestConfiguration;
 import com.linkedin.metadata.config.PreProcessHooks;
 import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration;
 import com.linkedin.metadata.config.search.CustomConfiguration;
@@ -30,9 +31,9 @@
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl;
 import com.linkedin.metadata.version.GitVersion;
-import io.datahub.test.fixtures.elasticsearch.FixtureReader;
+
 import java.util.Optional;
-import org.elasticsearch.client.RestHighLevelClient;
+import org.opensearch.client.RestHighLevelClient;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.boot.test.context.TestConfiguration;
@@ -46,15 +47,15 @@
 import java.util.Map;
 
 import static com.linkedin.metadata.Constants.*;
-import static com.linkedin.metadata.ESTestConfiguration.REFRESH_INTERVAL_SECONDS;
+import static io.datahubproject.test.search.config.SearchTestContainerConfiguration.REFRESH_INTERVAL_SECONDS;
 import static org.mockito.ArgumentMatchers.anySet;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
 
 @TestConfiguration
-@Import(ESTestConfiguration.class)
-public class ESSampleDataFixture {
+@Import(SearchCommonTestConfiguration.class)
+public class SampleDataFixtureConfiguration {
     /**
      * Interested in adding more fixtures? Here's what you will need to update?
      * 1. Create a new indexPrefix and FixtureName. Both are needed or else all fixtures will load on top of each other,
@@ -118,7 +119,7 @@ protected EntityIndexBuilders entityIndexBuilders(
 
     @Bean(name = "longTailEntityIndexBuilders")
     protected EntityIndexBuilders longTailEntityIndexBuilders(
-            @Qualifier("longTailEntityRegistry") EntityRegistry longTailEntityRegistry,
+            @Qualifier("entityRegistry") EntityRegistry longTailEntityRegistry,
             @Qualifier("longTailIndexConvention") IndexConvention indexConvention
     ) {
         return entityIndexBuildersHelper(longTailEntityRegistry, indexConvention);
@@ -147,7 +148,7 @@ protected ElasticSearchService entitySearchService(
 
     @Bean(name = "longTailEntitySearchService")
     protected ElasticSearchService longTailEntitySearchService(
-            @Qualifier("longTailEntityRegistry") EntityRegistry longTailEntityRegistry,
+            @Qualifier("entityRegistry") EntityRegistry longTailEntityRegistry,
             @Qualifier("longTailEntityIndexBuilders") EntityIndexBuilders longTailEndexBuilders,
             @Qualifier("longTailIndexConvention") IndexConvention longTailIndexConvention
     ) throws IOException {
@@ -186,7 +187,7 @@ protected SearchService searchService(
     @Bean(name = "longTailSearchService")
     @Nonnull
     protected SearchService longTailSearchService(
-            @Qualifier("longTailEntityRegistry") EntityRegistry longTailEntityRegistry,
+            @Qualifier("entityRegistry") EntityRegistry longTailEntityRegistry,
             @Qualifier("longTailEntitySearchService") ElasticSearchService longTailEntitySearchService,
             @Qualifier("longTailEntityIndexBuilders") EntityIndexBuilders longTailIndexBuilders,
             @Qualifier("longTailPrefix") String longTailPrefix,
@@ -248,7 +249,7 @@ protected EntityClient entityClient(
     protected EntityClient longTailEntityClient(
             @Qualifier("sampleDataSearchService") SearchService searchService,
             @Qualifier("sampleDataEntitySearchService") ElasticSearchService entitySearchService,
-            @Qualifier("longTailEntityRegistry") EntityRegistry longTailEntityRegistry
+            @Qualifier("entityRegistry") EntityRegistry longTailEntityRegistry
     ) {
         return entityClientHelper(searchService, entitySearchService, longTailEntityRegistry);
     }
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESTestFixtureUtils.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchFixtureUtils.java
similarity index 67%
rename from metadata-io/src/test/java/com/linkedin/metadata/ESTestFixtureUtils.java
rename to metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchFixtureUtils.java
index 914c5be9f5b09..d74dd041f082e 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/ESTestFixtureUtils.java
+++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchFixtureUtils.java
@@ -1,26 +1,45 @@
-package com.linkedin.metadata;
+package io.datahubproject.test.fixtures.search;
 
+import com.fasterxml.jackson.core.StreamReadConstraints;
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import io.datahubproject.test.search.ElasticsearchTestContainer;
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
 import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
-import io.datahub.test.fixtures.elasticsearch.EntityExporter;
-import io.datahub.test.fixtures.elasticsearch.FixtureReader;
-import io.datahub.test.fixtures.elasticsearch.FixtureWriter;
-import io.datahub.test.fixtures.elasticsearch.LineageExporter;
-import io.datahub.test.models.DatasetAnonymized;
-import org.elasticsearch.client.RestHighLevelClient;
+import io.datahubproject.test.models.DatasetAnonymized;
+import io.datahubproject.test.search.SearchTestUtils;
+import org.opensearch.client.RestHighLevelClient;
 import org.springframework.boot.test.context.TestConfiguration;
-import org.springframework.context.annotation.Import;
+import org.springframework.context.annotation.Bean;
+import org.testcontainers.containers.GenericContainer;
 import org.testng.annotations.Ignore;
 import org.testng.annotations.Test;
 
 import java.io.IOException;
 import java.util.Set;
 
-import static com.linkedin.metadata.ESTestConfiguration.REFRESH_INTERVAL_SECONDS;
-import static com.linkedin.metadata.ESTestUtils.environmentRestClientBuilder;
+import static com.linkedin.metadata.Constants.INGESTION_MAX_SERIALIZED_STRING_LENGTH;
+import static com.linkedin.metadata.Constants.MAX_JACKSON_STRING_SIZE;
 
+/**
+ * This class is used for extracting and moving search fixture data.
+ */
 @TestConfiguration
-@Import(ESTestConfiguration.class)
-public class ESTestFixtureUtils {
+public class SearchFixtureUtils {
+
+    final public static String FIXTURE_BASE = "src/test/resources/elasticsearch";
+
+    final public static ObjectMapper OBJECT_MAPPER = new ObjectMapper()
+            .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+    static {
+        int maxSize = Integer.parseInt(System.getenv().getOrDefault(INGESTION_MAX_SERIALIZED_STRING_LENGTH, MAX_JACKSON_STRING_SIZE));
+        OBJECT_MAPPER.getFactory().setStreamReadConstraints(StreamReadConstraints.builder().maxStringLength(maxSize).build());
+    }
+
+    @Bean(name = "testSearchContainer")
+    public GenericContainer<?> testSearchContainer() {
+        return new ElasticsearchTestContainer().startContainer();
+    }
 
     @Test
     @Ignore("Fixture capture lineage")
@@ -37,7 +56,7 @@ private void extractSearchLineageTestFixture() throws IOException {
         String rootUrn = "urn:li:dataset:(urn:li:dataPlatform:teradata,teradata.simba.pp_bi_tables.tmis_daily_metrics_final_agg,PROD)";
 
         // Set.of("system_metadata_service_v1", "datasetindex_v2", "graph_service_v1")
-        try (RestHighLevelClient client = new RestHighLevelClient(environmentRestClientBuilder())) {
+        try (RestHighLevelClient client = new RestHighLevelClient(SearchTestUtils.environmentRestClientBuilder())) {
             FixtureWriter fixtureWriter = FixtureWriter.builder()
                     .client(client)
                     .build();
@@ -76,7 +95,7 @@ private void extractEntityTestFixture() throws IOException {
         String prefix = "";
         String commonSuffix = "index_v2";
 
-        try (RestHighLevelClient client = new RestHighLevelClient(environmentRestClientBuilder())) {
+        try (RestHighLevelClient client = new RestHighLevelClient(SearchTestUtils.environmentRestClientBuilder())) {
             FixtureWriter fixtureWriter = FixtureWriter.builder()
                     .client(client)
                     .build();
@@ -102,7 +121,7 @@ private void extractEntityTestFixture() throws IOException {
      * 3. Uncomment and run test
      */
     private void reindexTestFixtureData() throws IOException {
-        ESBulkProcessor bulkProcessor = ESBulkProcessor.builder(new RestHighLevelClient(environmentRestClientBuilder()))
+        ESBulkProcessor bulkProcessor = ESBulkProcessor.builder(new RestHighLevelClient(SearchTestUtils.environmentRestClientBuilder()))
                 .async(true)
                 .bulkRequestsLimit(1000)
                 .retryInterval(1L)
@@ -112,7 +131,7 @@ private void reindexTestFixtureData() throws IOException {
         FixtureReader reader = FixtureReader.builder()
                 .bulkProcessor(bulkProcessor)
                 .fixtureName("long_tail")
-                .refreshIntervalSeconds(REFRESH_INTERVAL_SECONDS)
+                .refreshIntervalSeconds(SearchTestContainerConfiguration.REFRESH_INTERVAL_SECONDS)
                 .build();
 
         reader.read();
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESSearchLineageFixture.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java
similarity index 95%
rename from metadata-io/src/test/java/com/linkedin/metadata/ESSearchLineageFixture.java
rename to metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java
index ade7435bf6652..93d3f108d9e47 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/ESSearchLineageFixture.java
+++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java
@@ -1,5 +1,7 @@
-package com.linkedin.metadata;
+package io.datahubproject.test.fixtures.search;
 
+import io.datahubproject.test.search.config.SearchCommonTestConfiguration;
+import io.datahubproject.test.search.config.SearchTestContainerConfiguration;
 import com.linkedin.metadata.config.PreProcessHooks;
 import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration;
 import com.linkedin.metadata.config.cache.SearchLineageCacheConfiguration;
@@ -32,9 +34,10 @@
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl;
 import com.linkedin.metadata.version.GitVersion;
-import io.datahub.test.fixtures.elasticsearch.FixtureReader;
+
 import java.util.Optional;
-import org.elasticsearch.client.RestHighLevelClient;
+
+import org.opensearch.client.RestHighLevelClient;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.boot.test.context.TestConfiguration;
@@ -48,12 +51,11 @@
 import java.util.Map;
 
 import static com.linkedin.metadata.Constants.*;
-import static com.linkedin.metadata.ESTestConfiguration.REFRESH_INTERVAL_SECONDS;
 
 
 @TestConfiguration
-@Import(ESTestConfiguration.class)
-public class ESSearchLineageFixture {
+@Import(SearchCommonTestConfiguration.class)
+public class SearchLineageFixtureConfiguration {
 
     @Autowired
     private ESBulkProcessor _bulkProcessor;
@@ -155,7 +157,7 @@ protected LineageSearchService lineageSearchService(
                 .bulkProcessor(_bulkProcessor)
                 .fixtureName(fixtureName)
                 .targetIndexPrefix(prefix)
-                .refreshIntervalSeconds(REFRESH_INTERVAL_SECONDS)
+                .refreshIntervalSeconds(SearchTestContainerConfiguration.REFRESH_INTERVAL_SECONDS)
                 .build()
                 .read();
 
diff --git a/metadata-io/src/test/java/io/datahub/test/models/Anonymized.java b/metadata-io/src/test/java/io/datahubproject/test/models/Anonymized.java
similarity index 97%
rename from metadata-io/src/test/java/io/datahub/test/models/Anonymized.java
rename to metadata-io/src/test/java/io/datahubproject/test/models/Anonymized.java
index 1108097dff86a..6036473063059 100644
--- a/metadata-io/src/test/java/io/datahub/test/models/Anonymized.java
+++ b/metadata-io/src/test/java/io/datahubproject/test/models/Anonymized.java
@@ -1,4 +1,4 @@
-package io.datahub.test.models;
+package io.datahubproject.test.models;
 
 import com.fasterxml.jackson.annotation.JsonSetter;
 
diff --git a/metadata-io/src/test/java/io/datahub/test/models/DatasetAnonymized.java b/metadata-io/src/test/java/io/datahubproject/test/models/DatasetAnonymized.java
similarity index 97%
rename from metadata-io/src/test/java/io/datahub/test/models/DatasetAnonymized.java
rename to metadata-io/src/test/java/io/datahubproject/test/models/DatasetAnonymized.java
index 225f52d993931..35813d22067a6 100644
--- a/metadata-io/src/test/java/io/datahub/test/models/DatasetAnonymized.java
+++ b/metadata-io/src/test/java/io/datahubproject/test/models/DatasetAnonymized.java
@@ -1,4 +1,4 @@
-package io.datahub.test.models;
+package io.datahubproject.test.models;
 
 import com.fasterxml.jackson.annotation.JsonGetter;
 
diff --git a/metadata-io/src/test/java/io/datahub/test/models/GraphAnonymized.java b/metadata-io/src/test/java/io/datahubproject/test/models/GraphAnonymized.java
similarity index 82%
rename from metadata-io/src/test/java/io/datahub/test/models/GraphAnonymized.java
rename to metadata-io/src/test/java/io/datahubproject/test/models/GraphAnonymized.java
index 5e6c5d57e050e..3d2360ae04228 100644
--- a/metadata-io/src/test/java/io/datahub/test/models/GraphAnonymized.java
+++ b/metadata-io/src/test/java/io/datahubproject/test/models/GraphAnonymized.java
@@ -1,4 +1,4 @@
-package io.datahub.test.models;
+package io.datahubproject.test.models;
 
 import com.fasterxml.jackson.annotation.JsonSetter;
 
@@ -13,7 +13,7 @@ public static class GraphNode extends Anonymized {
 
         @JsonSetter("urn")
         public void setUrn(String urn) {
-            this.urn = Anonymized.anonymizeUrn(urn);
+            this.urn = anonymizeUrn(urn);
         }
     }
 }
diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/ElasticsearchTestContainer.java b/metadata-io/src/test/java/io/datahubproject/test/search/ElasticsearchTestContainer.java
new file mode 100644
index 0000000000000..233a667d078dd
--- /dev/null
+++ b/metadata-io/src/test/java/io/datahubproject/test/search/ElasticsearchTestContainer.java
@@ -0,0 +1,42 @@
+package io.datahubproject.test.search;
+
+import org.testcontainers.containers.GenericContainer;
+import org.testcontainers.utility.DockerImageName;
+
+
+import static com.linkedin.metadata.DockerTestUtils.checkContainerEngine;
+
+public class ElasticsearchTestContainer implements SearchTestContainer {
+    private static final String ELASTIC_VERSION = "7.10.1";
+    private static final String ELASTIC_IMAGE_NAME = "docker.elastic.co/elasticsearch/elasticsearch";
+    private static final String ENV_ELASTIC_IMAGE_FULL_NAME = System.getenv("ELASTIC_IMAGE_FULL_NAME");
+    private static final String ELASTIC_IMAGE_FULL_NAME = ENV_ELASTIC_IMAGE_FULL_NAME != null
+            ? ENV_ELASTIC_IMAGE_FULL_NAME : ELASTIC_IMAGE_NAME + ":" + ELASTIC_VERSION;
+    private static final DockerImageName DOCKER_IMAGE_NAME = DockerImageName.parse(ELASTIC_IMAGE_FULL_NAME)
+            .asCompatibleSubstituteFor(ELASTIC_IMAGE_NAME);
+
+    protected static final GenericContainer<?> ES_CONTAINER;
+    private boolean isStarted = false;
+
+    // A helper method to create an ElasticsearchContainer defaulting to the current image and version, with the ability
+    // within firewalled environments to override with an environment variable to point to the offline repository.
+    static  {
+        ES_CONTAINER = new org.testcontainers.elasticsearch.ElasticsearchContainer(DOCKER_IMAGE_NAME);
+        checkContainerEngine(ES_CONTAINER.getDockerClient());
+        ES_CONTAINER.withEnv("ES_JAVA_OPTS", SEARCH_JAVA_OPTS).withStartupTimeout(STARTUP_TIMEOUT);
+    }
+
+    @Override
+    public GenericContainer<?> startContainer() {
+        if (!isStarted) {
+            ElasticsearchTestContainer.ES_CONTAINER.start();
+            isStarted = true;
+        }
+        return ES_CONTAINER;
+    }
+
+    @Override
+    public void stopContainer() {
+        ES_CONTAINER.stop();
+    }
+}
diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/OpenSearchTestContainer.java b/metadata-io/src/test/java/io/datahubproject/test/search/OpenSearchTestContainer.java
new file mode 100644
index 0000000000000..d94b88b466f89
--- /dev/null
+++ b/metadata-io/src/test/java/io/datahubproject/test/search/OpenSearchTestContainer.java
@@ -0,0 +1,43 @@
+package io.datahubproject.test.search;
+
+import org.opensearch.testcontainers.OpensearchContainer;
+import org.testcontainers.containers.GenericContainer;
+import org.testcontainers.utility.DockerImageName;
+
+
+import static com.linkedin.metadata.DockerTestUtils.checkContainerEngine;
+
+public class OpenSearchTestContainer implements SearchTestContainer {
+    private static final String OPENSEARCH_VERSION = "2.9.0";
+    private static final String OPENSEARCH_IMAGE_NAME = "opensearchproject/opensearch";
+    private static final String ENV_OPENSEARCH_IMAGE_FULL_NAME = System.getenv("OPENSEARCH_IMAGE_FULL_NAME");
+    private static final String OPENSEARCH_IMAGE_FULL_NAME = ENV_OPENSEARCH_IMAGE_FULL_NAME != null
+            ? ENV_OPENSEARCH_IMAGE_FULL_NAME : OPENSEARCH_IMAGE_NAME + ":" + OPENSEARCH_VERSION;
+    private static final DockerImageName DOCKER_IMAGE_NAME = DockerImageName.parse(OPENSEARCH_IMAGE_FULL_NAME)
+            .asCompatibleSubstituteFor(OPENSEARCH_IMAGE_NAME);
+
+    protected static final GenericContainer<?> OS_CONTAINER;
+    private boolean isStarted = false;
+
+    // A helper method to create an ElasticseachContainer defaulting to the current image and version, with the ability
+    // within firewalled environments to override with an environment variable to point to the offline repository.
+    static  {
+        OS_CONTAINER = new OpensearchContainer(DOCKER_IMAGE_NAME);
+        checkContainerEngine(OS_CONTAINER.getDockerClient());
+        OS_CONTAINER.withEnv("OPENSEARCH_JAVA_OPTS", SEARCH_JAVA_OPTS).withStartupTimeout(STARTUP_TIMEOUT);
+    }
+
+    @Override
+    public GenericContainer<?> startContainer() {
+        if (!isStarted) {
+            OS_CONTAINER.start();
+            isStarted = true;
+        }
+        return OS_CONTAINER;
+    }
+
+    @Override
+    public void stopContainer() {
+        OS_CONTAINER.stop();
+    }
+}
diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java
new file mode 100644
index 0000000000000..67e1ee368f513
--- /dev/null
+++ b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java
@@ -0,0 +1,14 @@
+package io.datahubproject.test.search;
+
+import org.testcontainers.containers.GenericContainer;
+
+import java.time.Duration;
+
+public interface SearchTestContainer {
+    String SEARCH_JAVA_OPTS = "-Xms64m -Xmx384m -XX:MaxDirectMemorySize=368435456";
+    Duration STARTUP_TIMEOUT = Duration.ofMinutes(5); // usually < 1min
+
+    GenericContainer<?> startContainer();
+
+    void stopContainer();
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java
similarity index 74%
rename from metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java
rename to metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java
index 7e9605cbe3db0..414b9f927fada 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java
+++ b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java
@@ -1,4 +1,4 @@
-package com.linkedin.metadata;
+package io.datahubproject.test.search;
 
 import com.datahub.authentication.Authentication;
 import com.datahub.plugins.auth.authorization.Authorizer;
@@ -17,48 +17,32 @@
 import com.linkedin.metadata.search.ScrollResult;
 import com.linkedin.metadata.search.SearchResult;
 import com.linkedin.metadata.search.SearchService;
-import java.time.Duration;
-import java.util.List;
-import java.util.Optional;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-import javax.annotation.Nullable;
+import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
 import org.apache.http.HttpHost;
 import org.apache.http.auth.AuthScope;
 import org.apache.http.auth.UsernamePasswordCredentials;
 import org.apache.http.client.CredentialsProvider;
 import org.apache.http.impl.client.BasicCredentialsProvider;
 import org.apache.http.impl.nio.client.HttpAsyncClientBuilder;
-import org.elasticsearch.client.RestClient;
-import org.elasticsearch.client.RestClientBuilder;
-import org.testcontainers.elasticsearch.ElasticsearchContainer;
-import org.testcontainers.utility.DockerImageName;
+import org.opensearch.client.RestClient;
+import org.opensearch.client.RestClientBuilder;
+
+import javax.annotation.Nullable;
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.AUTO_COMPLETE_ENTITY_TYPES;
 import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.SEARCHABLE_ENTITY_TYPES;
-import static com.linkedin.metadata.DockerTestUtils.checkContainerEngine;
 
-public class ESTestUtils {
-    private ESTestUtils() {
+public class SearchTestUtils {
+    private SearchTestUtils() {
     }
 
-    private static final String ELASTIC_VERSION = "7.10.1";
-    private static final String ELASTIC_IMAGE_NAME = "docker.elastic.co/elasticsearch/elasticsearch";
-    private static final String ENV_ELASTIC_IMAGE_FULL_NAME = System.getenv("ELASTIC_IMAGE_FULL_NAME");
-    private static final String ELASTIC_IMAGE_FULL_NAME = ENV_ELASTIC_IMAGE_FULL_NAME != null
-            ? ENV_ELASTIC_IMAGE_FULL_NAME : ELASTIC_IMAGE_NAME + ":" + ELASTIC_VERSION;
-    private static final DockerImageName DOCKER_IMAGE_NAME = DockerImageName.parse(ELASTIC_IMAGE_FULL_NAME)
-            .asCompatibleSubstituteFor(ELASTIC_IMAGE_NAME);
-
-    public static final ElasticsearchContainer ES_CONTAINER;
-
-    // A helper method to create an ElasticseachContainer defaulting to the current image and version, with the ability
-    // within firewalled environments to override with an environment variable to point to the offline repository.
-    static  {
-        ES_CONTAINER = new ElasticsearchContainer(DOCKER_IMAGE_NAME);
-        checkContainerEngine(ES_CONTAINER.getDockerClient());
-        ES_CONTAINER.withEnv("ES_JAVA_OPTS", "-Xms64m -Xmx384m -XX:MaxDirectMemorySize=368435456")
-                .withStartupTimeout(Duration.ofMinutes(5)); // usually < 1min
+    public static void syncAfterWrite(ESBulkProcessor bulkProcessor) throws InterruptedException {
+        bulkProcessor.flush();
+        Thread.sleep(1000);
     }
 
     public final static List<String> SEARCHABLE_ENTITIES;
@@ -75,7 +59,7 @@ public static SearchResult searchAcrossEntities(SearchService searchService, Str
 
     public static SearchResult searchAcrossEntities(SearchService searchService, String query, @Nullable List<String> facets) {
         return searchService.searchAcrossEntities(SEARCHABLE_ENTITIES, query, null, null, 0,
-            100, new SearchFlags().setFulltext(true).setSkipCache(true), facets);
+                100, new SearchFlags().setFulltext(true).setSkipCache(true), facets);
     }
 
     public static SearchResult searchAcrossCustomEntities(SearchService searchService, String query, List<String> searchableEntities) {
@@ -89,12 +73,12 @@ public static SearchResult search(SearchService searchService, String query) {
 
     public static SearchResult search(SearchService searchService, List<String> entities, String query) {
         return searchService.search(entities, query, null, null, 0, 100,
-            new SearchFlags().setFulltext(true).setSkipCache(true));
+                new SearchFlags().setFulltext(true).setSkipCache(true));
     }
 
     public static ScrollResult scroll(SearchService searchService, String query, int batchSize, @Nullable String scrollId) {
         return searchService.scrollAcrossEntities(SEARCHABLE_ENTITIES, query, null, null,
-            scrollId, "3m", batchSize, new SearchFlags().setFulltext(true).setSkipCache(true));
+                scrollId, "3m", batchSize, new SearchFlags().setFulltext(true).setSkipCache(true));
     }
 
     public static SearchResult searchStructured(SearchService searchService, String query) {
@@ -112,9 +96,9 @@ public static LineageSearchResult lineage(LineageSearchService lineageSearchServ
                 .build());
 
         return lineageSearchService.searchAcrossLineage(root, LineageDirection.DOWNSTREAM,
-            SEARCHABLE_ENTITY_TYPES.stream().map(EntityTypeMapper::getName).collect(Collectors.toList()),
-            "*", hops, ResolverUtils.buildFilter(filters, List.of()), null, 0, 100, null,
-            null, new SearchFlags().setSkipCache(true));
+                SEARCHABLE_ENTITY_TYPES.stream().map(EntityTypeMapper::getName).collect(Collectors.toList()),
+                "*", hops, ResolverUtils.buildFilter(filters, List.of()), null, 0, 100, null,
+                null, new SearchFlags().setSkipCache(true));
     }
 
     public static AutoCompleteResults autocomplete(SearchableEntityType<?, String> searchableEntityType, String query) throws Exception {
@@ -160,4 +144,4 @@ public HttpAsyncClientBuilder customizeHttpClient(
                     }
                 });
     }
-}
\ No newline at end of file
+}
diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java
new file mode 100644
index 0000000000000..530d3f4d53625
--- /dev/null
+++ b/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java
@@ -0,0 +1,63 @@
+package io.datahubproject.test.search.config;
+
+import com.fasterxml.jackson.dataformat.yaml.YAMLMapper;
+import com.linkedin.metadata.config.search.CustomConfiguration;
+import com.linkedin.metadata.config.search.ExactMatchConfiguration;
+import com.linkedin.metadata.config.search.PartialConfiguration;
+import com.linkedin.metadata.config.search.SearchConfiguration;
+import com.linkedin.metadata.config.search.WordGramConfiguration;
+import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration;
+import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
+import com.linkedin.metadata.models.registry.EntityRegistry;
+import com.linkedin.metadata.models.registry.EntityRegistryException;
+import org.springframework.boot.test.context.TestConfiguration;
+import org.springframework.context.annotation.Bean;
+
+/**
+ * This is common configuration for search regardless of which
+ * test container implementation.
+ */
+@TestConfiguration
+public class SearchCommonTestConfiguration {
+    @Bean
+    public SearchConfiguration searchConfiguration() {
+        SearchConfiguration searchConfiguration = new SearchConfiguration();
+        searchConfiguration.setMaxTermBucketSize(20);
+
+        ExactMatchConfiguration exactMatchConfiguration = new ExactMatchConfiguration();
+        exactMatchConfiguration.setExclusive(false);
+        exactMatchConfiguration.setExactFactor(10.0f);
+        exactMatchConfiguration.setWithPrefix(true);
+        exactMatchConfiguration.setPrefixFactor(6.0f);
+        exactMatchConfiguration.setCaseSensitivityFactor(0.7f);
+        exactMatchConfiguration.setEnableStructured(true);
+
+        WordGramConfiguration wordGramConfiguration = new WordGramConfiguration();
+        wordGramConfiguration.setTwoGramFactor(1.2f);
+        wordGramConfiguration.setThreeGramFactor(1.5f);
+        wordGramConfiguration.setFourGramFactor(1.8f);
+
+        PartialConfiguration partialConfiguration = new PartialConfiguration();
+        partialConfiguration.setFactor(0.4f);
+        partialConfiguration.setUrnFactor(0.5f);
+
+        searchConfiguration.setExactMatch(exactMatchConfiguration);
+        searchConfiguration.setWordGram(wordGramConfiguration);
+        searchConfiguration.setPartial(partialConfiguration);
+        return searchConfiguration;
+    }
+
+    @Bean
+    public CustomSearchConfiguration customSearchConfiguration() throws Exception {
+        CustomConfiguration customConfiguration = new CustomConfiguration();
+        customConfiguration.setEnabled(true);
+        customConfiguration.setFile("search_config_builder_test.yml");
+        return customConfiguration.resolve(new YAMLMapper());
+    }
+
+    @Bean(name = "entityRegistry")
+    public EntityRegistry entityRegistry() throws EntityRegistryException {
+        return new ConfigEntityRegistry(
+                SearchCommonTestConfiguration.class.getClassLoader().getResourceAsStream("entity-registry.yml"));
+    }
+}
diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchTestContainerConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchTestContainerConfiguration.java
new file mode 100644
index 0000000000000..2cfa9f9187825
--- /dev/null
+++ b/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchTestContainerConfiguration.java
@@ -0,0 +1,88 @@
+package io.datahubproject.test.search.config;
+
+import com.linkedin.metadata.config.search.ElasticSearchConfiguration;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
+import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
+import com.linkedin.metadata.version.GitVersion;
+import java.util.Optional;
+
+import org.apache.http.HttpHost;
+import org.apache.http.impl.nio.reactor.IOReactorConfig;
+import org.opensearch.action.support.WriteRequest;
+import org.opensearch.client.RestClient;
+import org.opensearch.client.RestClientBuilder;
+import org.opensearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.boot.test.context.TestConfiguration;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Primary;
+import org.testcontainers.containers.GenericContainer;
+
+import javax.annotation.Nonnull;
+
+import java.util.Map;
+
+
+/**
+ * This configuration is for `test containers` it builds these objects tied to
+ * the test container instantiated for tests. Could be ES or OpenSearch, etc.
+ *
+ * Does your test required a running instance? If no, {@link io.datahubproject.test.search.config.SearchCommonTestConfiguration} instead.
+ */
+@TestConfiguration
+public class SearchTestContainerConfiguration {
+    // This port is overridden by the specific test container instance
+    private static final int HTTP_PORT = 9200;
+    public static final int REFRESH_INTERVAL_SECONDS = 5;
+
+    @Primary
+    @Bean(name = "searchRestHighLevelClient")
+    @Nonnull
+    public RestHighLevelClient getElasticsearchClient(@Qualifier("testSearchContainer") GenericContainer<?> searchContainer) {
+        // A helper method to create a search test container defaulting to the current image and version, with the ability
+        // within firewalled environments to override with an environment variable to point to the offline repository.
+        // A helper method to construct a standard rest client for search.
+        final RestClientBuilder builder =
+                RestClient.builder(new HttpHost(
+                        "localhost",
+                        searchContainer.getMappedPort(HTTP_PORT), "http")
+                ).setHttpClientConfigCallback(httpAsyncClientBuilder ->
+                        httpAsyncClientBuilder.setDefaultIOReactorConfig(IOReactorConfig.custom().setIoThreadCount(1).build()));
+
+        builder.setRequestConfigCallback(requestConfigBuilder -> requestConfigBuilder.
+                setConnectionRequestTimeout(30000));
+
+        return new RestHighLevelClient(builder);
+    }
+
+    /*
+      Cannot use the factory class without circular dependencies
+     */
+    @Primary
+    @Bean(name = "searchBulkProcessor")
+    @Nonnull
+    public ESBulkProcessor getBulkProcessor(@Qualifier("searchRestHighLevelClient") RestHighLevelClient searchClient) {
+        return ESBulkProcessor.builder(searchClient)
+                .async(true)
+                /*
+                 * Force a refresh as part of this request. This refresh policy does not scale for high indexing or search throughput but is useful
+                 * to present a consistent view to for indices with very low traffic. And it is wonderful for tests!
+                 */
+                .writeRequestRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE)
+                .bulkRequestsLimit(10000)
+                .bulkFlushPeriod(REFRESH_INTERVAL_SECONDS - 1)
+                .retryInterval(1L)
+                .numRetries(1)
+                .build();
+    }
+
+    @Primary
+    @Bean(name = "searchIndexBuilder")
+    @Nonnull
+    protected ESIndexBuilder getIndexBuilder(@Qualifier("searchRestHighLevelClient") RestHighLevelClient searchClient) {
+        GitVersion gitVersion = new GitVersion("0.0.0-test", "123456", Optional.empty());
+        return new ESIndexBuilder(searchClient, 1, 1, 3, 1, Map.of(),
+                false, false,
+                new ElasticSearchConfiguration(), gitVersion);
+    }
+}
diff --git a/metadata-io/src/test/resources/testng-other.xml b/metadata-io/src/test/resources/testng-other.xml
new file mode 100644
index 0000000000000..e214fdb8c1f61
--- /dev/null
+++ b/metadata-io/src/test/resources/testng-other.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd">
+<suite name="metadata-io" parallel="classes" thread-count="3">
+    <test name="nonsearch">
+        <packages>
+            <package name="com.linkedin.metadata.*">
+                <exclude name="com.linkedin.metadata.graph.search.elasticsearch" />
+                <exclude name="com.linkedin.metadata.search.elasticsearch"/>
+                <exclude name="com.linkedin.metadata.graph.search.opensearch" />
+                <exclude name="com.linkedin.metadata.search.opensearch"/>
+            </package>
+        </packages>
+    </test>
+</suite>
\ No newline at end of file
diff --git a/metadata-io/src/test/resources/testng-search.xml b/metadata-io/src/test/resources/testng-search.xml
new file mode 100644
index 0000000000000..3b32ae34c1f5a
--- /dev/null
+++ b/metadata-io/src/test/resources/testng-search.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd">
+<suite name="metadata-io-search" parallel="tests" thread-count="2">
+    <test name="elasticsearch-testcontainers">
+        <packages>
+            <package name="com.linkedin.metadata.graph.search.elasticsearch.*" />
+            <package name="com.linkedin.metadata.search.elasticsearch.*"/>
+        </packages>
+    </test>
+    <test name="opensearch-testcontainers">
+        <packages>
+            <package name="com.linkedin.metadata.graph.search.opensearch.*" />
+            <package name="com.linkedin.metadata.search.opensearch.*"/>
+        </packages>
+    </test>
+</suite>
\ No newline at end of file
diff --git a/metadata-io/src/test/resources/testng.xml b/metadata-io/src/test/resources/testng.xml
new file mode 100644
index 0000000000000..fdd1c1a6c8921
--- /dev/null
+++ b/metadata-io/src/test/resources/testng.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd">
+<!--
+Grouping Elasticsearch and OpenSearch tests into a suite reduce testcontainers.
+There are currently 2 implementations. Group them into separate tests and thus
+allow only 1 container per impl. The 2 search implementation tests are run in
+parallel followed by everything else.
+-->
+<suite name="metadata-io-testcontainers">
+<suite-files>
+    <suite-file path="testng-search.xml"/>
+    <suite-file path="testng-other.xml"/>
+</suite-files>
+</suite>
\ No newline at end of file
diff --git a/metadata-jobs/mae-consumer/build.gradle b/metadata-jobs/mae-consumer/build.gradle
index 69fe2255a6916..d36fd0de40d03 100644
--- a/metadata-jobs/mae-consumer/build.gradle
+++ b/metadata-jobs/mae-consumer/build.gradle
@@ -44,6 +44,7 @@ dependencies {
     testImplementation externalDependency.mockito
     implementation externalDependency.awsMskIamAuth
 
+    testImplementation externalDependency.testng
     testImplementation externalDependency.springBootTest
     testRuntimeOnly externalDependency.logbackClassic
 }
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/ElasticEvent.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/ElasticEvent.java
index 7ba04ecd2389e..b0fade24e26ad 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/ElasticEvent.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/ElasticEvent.java
@@ -2,7 +2,7 @@
 
 import com.linkedin.events.metadata.ChangeType;
 import lombok.Data;
-import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.opensearch.core.xcontent.XContentBuilder;
 
 @Data
 public abstract class ElasticEvent {
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/ElasticsearchConnector.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/ElasticsearchConnector.java
index afa69c9f1750e..bea75f7b282ee 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/ElasticsearchConnector.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/ElasticsearchConnector.java
@@ -5,11 +5,11 @@
 
 import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.DocWriteRequest;
-import org.elasticsearch.action.delete.DeleteRequest;
-import org.elasticsearch.action.index.IndexRequest;
-import org.elasticsearch.action.update.UpdateRequest;
-import org.elasticsearch.common.xcontent.XContentType;
+import org.opensearch.action.DocWriteRequest;
+import org.opensearch.action.delete.DeleteRequest;
+import org.opensearch.action.index.IndexRequest;
+import org.opensearch.action.update.UpdateRequest;
+import org.opensearch.common.xcontent.XContentType;
 
 
 @Slf4j
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/JsonElasticEvent.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/JsonElasticEvent.java
index d42464051d7ec..230cd8433e6ff 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/JsonElasticEvent.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/JsonElasticEvent.java
@@ -1,11 +1,11 @@
 package com.linkedin.metadata.kafka.elasticsearch;
 
-import org.elasticsearch.common.xcontent.DeprecationHandler;
-import org.elasticsearch.common.xcontent.NamedXContentRegistry;
-import org.elasticsearch.common.xcontent.XContentBuilder;
-import org.elasticsearch.common.xcontent.XContentFactory;
-import org.elasticsearch.common.xcontent.XContentParser;
-import org.elasticsearch.common.xcontent.XContentType;
+import org.opensearch.core.xcontent.DeprecationHandler;
+import org.opensearch.core.xcontent.NamedXContentRegistry;
+import org.opensearch.core.xcontent.XContentBuilder;
+import org.opensearch.common.xcontent.XContentFactory;
+import org.opensearch.core.xcontent.XContentParser;
+import org.opensearch.common.xcontent.XContentType;
 
 import java.io.IOException;
 import javax.annotation.Nullable;
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/MCEElasticEvent.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/MCEElasticEvent.java
index 184efa1573b35..a3d6dca75068b 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/MCEElasticEvent.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/elasticsearch/MCEElasticEvent.java
@@ -2,12 +2,12 @@
 
 import com.linkedin.data.template.RecordTemplate;
 import com.datahub.util.RecordUtils;
-import org.elasticsearch.common.xcontent.DeprecationHandler;
-import org.elasticsearch.common.xcontent.NamedXContentRegistry;
-import org.elasticsearch.common.xcontent.XContentBuilder;
-import org.elasticsearch.common.xcontent.XContentFactory;
-import org.elasticsearch.common.xcontent.XContentParser;
-import org.elasticsearch.common.xcontent.XContentType;
+import org.opensearch.core.xcontent.DeprecationHandler;
+import org.opensearch.core.xcontent.NamedXContentRegistry;
+import org.opensearch.core.xcontent.XContentBuilder;
+import org.opensearch.common.xcontent.XContentFactory;
+import org.opensearch.core.xcontent.XContentParser;
+import org.opensearch.common.xcontent.XContentType;
 
 import java.io.IOException;
 import javax.annotation.Nullable;
diff --git a/metadata-models/build.gradle b/metadata-models/build.gradle
index db01be3ccebdf..53e7765152aef 100644
--- a/metadata-models/build.gradle
+++ b/metadata-models/build.gradle
@@ -34,6 +34,7 @@ dependencies {
 
   swaggerCodegen externalDependency.swaggerCli
   testImplementation externalDependency.guava
+  testImplementation externalDependency.testngJava8
 }
 
 sourceSets {
diff --git a/metadata-service/auth-impl/build.gradle b/metadata-service/auth-impl/build.gradle
index 1ffeb99e7ad4a..60d622dea5447 100644
--- a/metadata-service/auth-impl/build.gradle
+++ b/metadata-service/auth-impl/build.gradle
@@ -24,4 +24,5 @@ dependencies {
   annotationProcessor externalDependency.lombok
 
   testImplementation externalDependency.mockito
+  testImplementation externalDependency.testng
 }
\ No newline at end of file
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/RestHighLevelClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/RestHighLevelClientFactory.java
index 1da66f3192f80..5f50b8f7f0508 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/RestHighLevelClientFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/RestHighLevelClientFactory.java
@@ -24,9 +24,9 @@
 import org.apache.http.nio.reactor.IOReactorException;
 import org.apache.http.nio.reactor.IOReactorExceptionHandler;
 import org.apache.http.ssl.SSLContexts;
-import org.elasticsearch.client.RestClient;
-import org.elasticsearch.client.RestClientBuilder;
-import org.elasticsearch.client.RestHighLevelClient;
+import org.opensearch.client.RestClient;
+import org.opensearch.client.RestClientBuilder;
+import org.opensearch.client.RestHighLevelClient;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.beans.factory.annotation.Value;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java
index 0be69e5dad58d..d7aee59ca6dd1 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java
@@ -39,7 +39,7 @@
 import com.linkedin.metadata.version.GitVersion;
 import com.linkedin.usage.UsageClient;
 import javax.annotation.Nonnull;
-import org.elasticsearch.client.RestHighLevelClient;
+import org.opensearch.client.RestHighLevelClient;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.beans.factory.annotation.Value;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/MostPopularCandidateSourceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/MostPopularCandidateSourceFactory.java
index c74f5e11cadce..c266b3635b16f 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/MostPopularCandidateSourceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/MostPopularCandidateSourceFactory.java
@@ -7,7 +7,7 @@
 import com.linkedin.metadata.recommendation.candidatesource.MostPopularSource;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import javax.annotation.Nonnull;
-import org.elasticsearch.client.RestHighLevelClient;
+import org.opensearch.client.RestHighLevelClient;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.context.annotation.Bean;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlyEditedCandidateSourceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlyEditedCandidateSourceFactory.java
index 58584a4d957de..109cc8dbc82d1 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlyEditedCandidateSourceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlyEditedCandidateSourceFactory.java
@@ -7,7 +7,7 @@
 import com.linkedin.metadata.recommendation.candidatesource.RecentlyEditedSource;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import javax.annotation.Nonnull;
-import org.elasticsearch.client.RestHighLevelClient;
+import org.opensearch.client.RestHighLevelClient;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.context.annotation.Bean;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlySearchedCandidateSourceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlySearchedCandidateSourceFactory.java
index b3779a132284f..5209f65a2ec63 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlySearchedCandidateSourceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlySearchedCandidateSourceFactory.java
@@ -5,7 +5,7 @@
 import com.linkedin.metadata.recommendation.candidatesource.RecentlySearchedSource;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import javax.annotation.Nonnull;
-import org.elasticsearch.client.RestHighLevelClient;
+import org.opensearch.client.RestHighLevelClient;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.context.annotation.Bean;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlyViewedCandidateSourceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlyViewedCandidateSourceFactory.java
index d0505e8d2a3ea..aea40b4d8eb46 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlyViewedCandidateSourceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlyViewedCandidateSourceFactory.java
@@ -7,7 +7,7 @@
 import com.linkedin.metadata.recommendation.candidatesource.RecentlyViewedSource;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import javax.annotation.Nonnull;
-import org.elasticsearch.client.RestHighLevelClient;
+import org.opensearch.client.RestHighLevelClient;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.context.annotation.Bean;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/BaseElasticSearchComponentsFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/BaseElasticSearchComponentsFactory.java
index eeb32ae1ddbf9..620af803723e7 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/BaseElasticSearchComponentsFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/BaseElasticSearchComponentsFactory.java
@@ -8,7 +8,7 @@
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import javax.annotation.Nonnull;
 import org.springframework.beans.factory.annotation.Value;
-import org.elasticsearch.client.RestHighLevelClient;
+import org.opensearch.client.RestHighLevelClient;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.context.annotation.Bean;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java
index 956157f70e6bc..fc6f92b2678f3 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java
@@ -6,8 +6,8 @@
 
 import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.support.WriteRequest;
-import org.elasticsearch.client.RestHighLevelClient;
+import org.opensearch.action.support.WriteRequest;
+import org.opensearch.client.RestHighLevelClient;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.beans.factory.annotation.Value;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java
index decbc2e12a998..495d77ccbb29f 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java
@@ -13,7 +13,7 @@
 import javax.annotation.Nullable;
 
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
-import org.elasticsearch.client.RestHighLevelClient;
+import org.opensearch.client.RestHighLevelClient;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.beans.factory.annotation.Value;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/DailyReport.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/DailyReport.java
index 2972316856a8d..2610ebd3528cd 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/DailyReport.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/DailyReport.java
@@ -12,7 +12,7 @@
 import java.io.IOException;
 import java.util.Optional;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.client.RestHighLevelClient;
+import org.opensearch.client.RestHighLevelClient;
 import org.joda.time.DateTime;
 import org.json.JSONObject;
 import org.springframework.scheduling.annotation.Scheduled;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/ScheduledAnalyticsFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/ScheduledAnalyticsFactory.java
index c5501067ff393..7cdca996a8131 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/ScheduledAnalyticsFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/ScheduledAnalyticsFactory.java
@@ -6,7 +6,7 @@
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import com.linkedin.metadata.version.GitVersion;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.client.RestHighLevelClient;
+import org.opensearch.client.RestHighLevelClient;
 import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
 import org.springframework.context.annotation.Bean;
diff --git a/metadata-service/factories/src/test/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactoryTest.java b/metadata-service/factories/src/test/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactoryTest.java
index 859c8e18cacff..266039afb45d5 100644
--- a/metadata-service/factories/src/test/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactoryTest.java
+++ b/metadata-service/factories/src/test/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactoryTest.java
@@ -2,7 +2,7 @@
 
 import com.linkedin.gms.factory.config.ConfigurationProvider;
 import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
-import org.elasticsearch.action.support.WriteRequest;
+import org.opensearch.action.support.WriteRequest;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.boot.context.properties.EnableConfigurationProperties;
 import org.springframework.boot.test.context.SpringBootTest;
diff --git a/metadata-service/factories/src/test/java/io/datahubproject/telemetry/TelemetryUtilsTest.java b/metadata-service/factories/src/test/java/io/datahubproject/telemetry/TelemetryUtilsTest.java
index 28c47f169a111..fe0d61986b4a6 100644
--- a/metadata-service/factories/src/test/java/io/datahubproject/telemetry/TelemetryUtilsTest.java
+++ b/metadata-service/factories/src/test/java/io/datahubproject/telemetry/TelemetryUtilsTest.java
@@ -7,8 +7,8 @@
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
-import static org.junit.Assert.*;
 import static org.mockito.ArgumentMatchers.*;
+import static org.testng.AssertJUnit.assertEquals;
 
 
 public class TelemetryUtilsTest {
diff --git a/metadata-service/health-servlet/src/main/java/com/datahub/health/controller/HealthCheckController.java b/metadata-service/health-servlet/src/main/java/com/datahub/health/controller/HealthCheckController.java
index 02ca5182cd2be..c200e63e0d497 100644
--- a/metadata-service/health-servlet/src/main/java/com/datahub/health/controller/HealthCheckController.java
+++ b/metadata-service/health-servlet/src/main/java/com/datahub/health/controller/HealthCheckController.java
@@ -11,11 +11,11 @@
 import java.util.concurrent.TimeUnit;
 import java.util.function.Supplier;
 
-import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequest;
-import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.cluster.health.ClusterHealthStatus;
+import org.opensearch.action.admin.cluster.health.ClusterHealthRequest;
+import org.opensearch.action.admin.cluster.health.ClusterHealthResponse;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.cluster.health.ClusterHealthStatus;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.http.HttpStatus;
diff --git a/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIAnalyticsTestConfiguration.java b/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIAnalyticsTestConfiguration.java
index 98f0db8fd10ef..83b1b3f87c724 100644
--- a/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIAnalyticsTestConfiguration.java
+++ b/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIAnalyticsTestConfiguration.java
@@ -7,7 +7,7 @@
 import com.datahub.authorization.AuthorizationResult;
 import com.datahub.authorization.AuthorizerChain;
 import com.linkedin.metadata.search.elasticsearch.ElasticSearchService;
-import org.elasticsearch.action.search.SearchResponse;
+import org.opensearch.action.search.SearchResponse;
 import org.mockito.Mockito;
 import org.springframework.boot.test.context.TestConfiguration;
 import org.springframework.context.annotation.Bean;
diff --git a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/util/OpenApiEntitiesUtilTest.java b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/util/OpenApiEntitiesUtilTest.java
index 8f87b041a7e03..e0fec07452302 100644
--- a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/util/OpenApiEntitiesUtilTest.java
+++ b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/util/OpenApiEntitiesUtilTest.java
@@ -17,8 +17,8 @@
 
 import java.util.List;
 
-import static org.junit.Assert.assertNotNull;
 import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertNotNull;
 
 
 @Import({OpenAPIEntityTestConfiguration.class})
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java
index 7910982a63133..f29461734ebfc 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java
@@ -13,7 +13,7 @@
 import io.swagger.v3.oas.annotations.tags.Tag;
 import java.util.List;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.client.tasks.GetTaskResponse;
+import org.opensearch.client.tasks.GetTaskResponse;
 import org.json.JSONObject;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java
index 17de9ceea35a3..1e6523e774d66 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java
@@ -35,7 +35,7 @@
 import javax.inject.Inject;
 import javax.inject.Named;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.client.tasks.GetTaskResponse;
+import org.opensearch.client.tasks.GetTaskResponse;
 import org.json.JSONObject;
 
 import static com.linkedin.metadata.Constants.*;
diff --git a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/operations/OperationsResourceTest.java b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/operations/OperationsResourceTest.java
index 665bc3cfc277c..470c6e87040ec 100644
--- a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/operations/OperationsResourceTest.java
+++ b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/operations/OperationsResourceTest.java
@@ -3,12 +3,13 @@
 import com.linkedin.metadata.timeseries.TimeseriesAspectService;
 import com.linkedin.util.Pair;
 import java.util.List;
-import junit.framework.TestCase;
 import mock.MockTimeseriesAspectService;
 import org.testng.annotations.Test;
 
+import static org.testng.AssertJUnit.*;
 
-public class OperationsResourceTest extends TestCase {
+
+public class OperationsResourceTest {
   private static final String TASK_ID = "taskId123";
 
 
diff --git a/metadata-service/services/build.gradle b/metadata-service/services/build.gradle
index 99345d6f6bc3f..22c62af324c12 100644
--- a/metadata-service/services/build.gradle
+++ b/metadata-service/services/build.gradle
@@ -63,8 +63,6 @@ dependencies {
 }
 
 test {
-    // https://docs.gradle.org/current/userguide/performance.html
-    maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
     testLogging.showStandardStreams = true
     testLogging.exceptionFormat = 'full'
 }
diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlySearchedSource.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlySearchedSource.java
index ac17c882c24b6..357a5df2edd44 100644
--- a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlySearchedSource.java
+++ b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlySearchedSource.java
@@ -20,18 +20,18 @@
 import javax.annotation.Nonnull;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.client.indices.GetIndexRequest;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.aggregations.AggregationBuilder;
-import org.elasticsearch.search.aggregations.AggregationBuilders;
-import org.elasticsearch.search.aggregations.BucketOrder;
-import org.elasticsearch.search.aggregations.bucket.terms.ParsedTerms;
-import org.elasticsearch.search.builder.SearchSourceBuilder;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.action.search.SearchResponse;
+import org.opensearch.client.RequestOptions;
+import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.client.indices.GetIndexRequest;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.search.aggregations.AggregationBuilder;
+import org.opensearch.search.aggregations.AggregationBuilders;
+import org.opensearch.search.aggregations.BucketOrder;
+import org.opensearch.search.aggregations.bucket.terms.ParsedTerms;
+import org.opensearch.search.builder.SearchSourceBuilder;
 
 
 @Slf4j
diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java
index e6f2106bd5c3e..ea59885e8b6d5 100644
--- a/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java
+++ b/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java
@@ -8,7 +8,7 @@
 import java.util.Optional;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
-import org.elasticsearch.client.tasks.GetTaskResponse;
+import org.opensearch.client.tasks.GetTaskResponse;
 
 
 public interface SystemMetadataService {
diff --git a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java
index 2c26c00e9c4d6..d788222c5d87b 100644
--- a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java
+++ b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java
@@ -9,16 +9,16 @@
 import com.linkedin.metadata.query.SearchFlags;
 import com.linkedin.metadata.search.elasticsearch.query.request.SearchRequestHandler;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.MatchAllQueryBuilder;
-import org.elasticsearch.index.query.MatchPhrasePrefixQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.index.query.SimpleQueryStringBuilder;
-import org.elasticsearch.index.query.TermQueryBuilder;
-import org.elasticsearch.index.query.functionscore.FieldValueFactorFunctionBuilder;
-import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
-import org.elasticsearch.index.query.functionscore.WeightBuilder;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.MatchAllQueryBuilder;
+import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder;
+import org.opensearch.index.query.QueryBuilder;
+import org.opensearch.index.query.SimpleQueryStringBuilder;
+import org.opensearch.index.query.TermQueryBuilder;
+import org.opensearch.index.query.functionscore.FieldValueFactorFunctionBuilder;
+import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder;
+import org.opensearch.index.query.functionscore.WeightBuilder;
 import org.springframework.web.context.WebApplicationContext;
 import org.springframework.web.context.support.WebApplicationContextUtils;
 
diff --git a/metadata-service/servlet/src/main/java/com/datahub/gms/util/CSVWriter.java b/metadata-service/servlet/src/main/java/com/datahub/gms/util/CSVWriter.java
index e9d1308e857d8..79d4f7077b797 100644
--- a/metadata-service/servlet/src/main/java/com/datahub/gms/util/CSVWriter.java
+++ b/metadata-service/servlet/src/main/java/com/datahub/gms/util/CSVWriter.java
@@ -2,8 +2,8 @@
 
 
 import lombok.Builder;
-import org.elasticsearch.index.query.functionscore.FieldValueFactorFunctionBuilder;
-import org.elasticsearch.index.query.functionscore.WeightBuilder;
+import org.opensearch.index.query.functionscore.FieldValueFactorFunctionBuilder;
+import org.opensearch.index.query.functionscore.WeightBuilder;
 
 import java.io.PrintWriter;
 import java.util.stream.Collectors;
diff --git a/metadata-utils/build.gradle b/metadata-utils/build.gradle
index 9f8ef70a0e728..1c1c368611488 100644
--- a/metadata-utils/build.gradle
+++ b/metadata-utils/build.gradle
@@ -26,6 +26,7 @@ dependencies {
 
   testImplementation project(':test-models')
   testImplementation project(path: ':test-models', configuration: 'testDataTemplate')
+  testImplementation externalDependency.testng
 
   constraints {
       implementation(externalDependency.log4jCore) {
diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/SearchUtil.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/SearchUtil.java
index 8b4b500dfc455..69bd3b461eb12 100644
--- a/metadata-utils/src/main/java/com/linkedin/metadata/utils/SearchUtil.java
+++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/SearchUtil.java
@@ -18,8 +18,8 @@
 
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import lombok.extern.slf4j.Slf4j;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
 
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
diff --git a/smoke-test/cypress-dev.sh b/smoke-test/cypress-dev.sh
index 41dca90acc9fc..93f03d36cbd19 100755
--- a/smoke-test/cypress-dev.sh
+++ b/smoke-test/cypress-dev.sh
@@ -17,4 +17,5 @@ npm install
 
 source ../../set-cypress-creds.sh
 
-npx cypress open
+npx cypress open \
+   --env "ADMIN_DISPLAYNAME=$CYPRESS_ADMIN_DISPLAYNAME,ADMIN_USERNAME=$CYPRESS_ADMIN_USERNAME,ADMIN_PASSWORD=$CYPRESS_ADMIN_PASSWORD"
diff --git a/smoke-test/run-quickstart.sh b/smoke-test/run-quickstart.sh
index 050b5d2db95c9..cd747321ad602 100755
--- a/smoke-test/run-quickstart.sh
+++ b/smoke-test/run-quickstart.sh
@@ -12,7 +12,16 @@ pip install -r requirements.txt
 mkdir -p ~/.datahub/plugins/frontend/auth/
 echo "test_user:test_pass" >> ~/.datahub/plugins/frontend/auth/user.props
 
+DATAHUB_SEARCH_IMAGE="${DATAHUB_SEARCH_IMAGE:=opensearchproject/opensearch}"
+DATAHUB_SEARCH_TAG="${DATAHUB_SEARCH_TAG:=2.9.0}"
+XPACK_SECURITY_ENABLED="${XPACK_SECURITY_ENABLED:=plugins.security.disabled=true}"
+ELASTICSEARCH_USE_SSL="${ELASTICSEARCH_USE_SSL:=false}"
+USE_AWS_ELASTICSEARCH="${USE_AWS_ELASTICSEARCH:=true}"
+
 echo "DATAHUB_VERSION = $DATAHUB_VERSION"
 DATAHUB_TELEMETRY_ENABLED=false  \
 DOCKER_COMPOSE_BASE="file://$( dirname "$DIR" )" \
+DATAHUB_SEARCH_IMAGE="$DATAHUB_SEARCH_IMAGE" DATAHUB_SEARCH_TAG="$DATAHUB_SEARCH_TAG" \
+XPACK_SECURITY_ENABLED="$XPACK_SECURITY_ENABLED" ELASTICSEARCH_USE_SSL="$ELASTICSEARCH_USE_SSL" \
+USE_AWS_ELASTICSEARCH="$USE_AWS_ELASTICSEARCH" \
 datahub docker quickstart --version ${DATAHUB_VERSION} --standalone_consumers --dump-logs-on-failure --kafka-setup
diff --git a/smoke-test/set-cypress-creds.sh b/smoke-test/set-cypress-creds.sh
index 0512724e9a269..82fe736b0a7e1 100644
--- a/smoke-test/set-cypress-creds.sh
+++ b/smoke-test/set-cypress-creds.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
 
 export CYPRESS_ADMIN_USERNAME=${ADMIN_USERNAME:-datahub}
-export CYPRESS_ADMIN_PASSWORD=${ADMIN_PASSWORD:-datahub}
\ No newline at end of file
+export CYPRESS_ADMIN_PASSWORD=${ADMIN_PASSWORD:-datahub}
+export CYPRESS_ADMIN_DISPLAYNAME=${ADMIN_DISPLAYNAME:-DataHub}
\ No newline at end of file
diff --git a/smoke-test/tests/cypress/cypress/e2e/login/login.js b/smoke-test/tests/cypress/cypress/e2e/login/login.js
index f86741b5afe01..309eedb10b6da 100644
--- a/smoke-test/tests/cypress/cypress/e2e/login/login.js
+++ b/smoke-test/tests/cypress/cypress/e2e/login/login.js
@@ -4,6 +4,6 @@ describe('login', () => {
     cy.get('input[data-testid=username]').type(Cypress.env('ADMIN_USERNAME'));
     cy.get('input[data-testid=password]').type(Cypress.env('ADMIN_PASSWORD'));
     cy.contains('Sign In').click();
-    cy.contains('Welcome back, DataHub');
+    cy.contains('Welcome back, ' + Cypress.env('ADMIN_DISPLAYNAME'));
   });
 })

From 5f06dbf542a75a0a260706ceb99b9eaf27824141 Mon Sep 17 00:00:00 2001
From: Zachary McNellis <zacharymcnellis@gmail.com>
Date: Thu, 21 Sep 2023 15:29:36 -0400
Subject: [PATCH 032/156] docs(observability): Custom Assertion user guide
 updates (#8878)

---
 docs/managed-datahub/observe/custom-assertions.md | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/docs/managed-datahub/observe/custom-assertions.md b/docs/managed-datahub/observe/custom-assertions.md
index d52ac4b38cb4b..e221cf1058fd0 100644
--- a/docs/managed-datahub/observe/custom-assertions.md
+++ b/docs/managed-datahub/observe/custom-assertions.md
@@ -99,11 +99,10 @@ The **Condition Type**: This defines the conditions under which the Assertion wi
 - **Is Not Equal To**: The assertion will fail if the query result is not equal to the configured value
 - **Is Greater Than**: The assertion will fail if the query result is greater than the configured value
 - **Is Less Than**: The assertion will fail if the query result is less than the configured value
-- **Is False**: The assertion will fail if the query result is false (i.e. 0)
 - **Is outside a range**: The assertion will fail if the query result is outside the configured range
-- **Grows More Than**: The assertion will fail if the query result grows more than the configured range. This can be either a percentage (**Percentage**) or an absolute value (**Differential**).
-- **Grows Less Than**: The assertion will fail if the query result grows less than the configured percentage. This can be either a percentage (**Percentage**) or an absolute value (**Differential**).
-- **Growth is outside a range**: The assertion will fail if the query result growth is outside the configured range. This can be either a percentage (**Percentage**) or an absolute value (**Differential**).
+- **Grows More Than**: The assertion will fail if the query result grows more than the configured range. This can be either a percentage (**Percentage**) or a number (**Value**).
+- **Grows Less Than**: The assertion will fail if the query result grows less than the configured percentage. This can be either a percentage (**Percentage**) or a number (**Value**).
+- **Growth is outside a range**: The assertion will fail if the query result growth is outside the configured range. This can be either a percentage (**Percentage**) or a number (**Value**).
   
 Custom Assertions also have an off switch: they can be started or stopped at any time with the click of button.
 

From 2a0200b0477ce5a0c697876b4619484b3caed9d5 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Thu, 21 Sep 2023 14:28:51 -0700
Subject: [PATCH 033/156] feat(ingest): bump acryl-sqlglot (#8882)

---
 metadata-ingestion/setup.py                   |  2 +-
 ...est_select_ambiguous_column_no_schema.json | 31 +++++++++++++++++++
 .../unit/sql_parsing/test_sqlglot_lineage.py  | 10 ++++++
 3 files changed, 42 insertions(+), 1 deletion(-)
 create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index b9169186174fa..e748461b156ae 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -138,7 +138,7 @@ def get_long_description():
 sqlglot_lib = {
     # Using an Acryl fork of sqlglot.
     # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:hsheth?expand=1
-    "acryl-sqlglot==18.0.2.dev15",
+    "acryl-sqlglot==18.5.2.dev45",
 }
 
 aws_common = {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json
new file mode 100644
index 0000000000000..10f5ee20b0c1f
--- /dev/null
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json
@@ -0,0 +1,31 @@
+{
+    "query_type": "SELECT",
+    "in_tables": [
+        "urn:li:dataset:(urn:li:dataPlatform:hive,t1,PROD)",
+        "urn:li:dataset:(urn:li:dataPlatform:hive,t2,PROD)"
+    ],
+    "out_tables": [],
+    "column_lineage": [
+        {
+            "downstream": {
+                "table": null,
+                "column": "a"
+            },
+            "upstreams": []
+        },
+        {
+            "downstream": {
+                "table": null,
+                "column": "b"
+            },
+            "upstreams": []
+        },
+        {
+            "downstream": {
+                "table": null,
+                "column": "c"
+            },
+            "upstreams": []
+        }
+    ]
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
index 7581d3bac010e..483c1ac4cc7f9 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
+++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
@@ -208,6 +208,16 @@ def test_select_from_union():
     )
 
 
+def test_select_ambiguous_column_no_schema():
+    assert_sql_result(
+        """
+        select A, B, C from t1 inner join t2 on t1.id = t2.id
+        """,
+        dialect="hive",
+        expected_file=RESOURCE_DIR / "test_select_ambiguous_column_no_schema.json",
+    )
+
+
 def test_merge_from_union():
     # TODO: We don't support merge statements yet, but the union should still get handled.
 

From 5481e19e0a66de0ae3567198c1de11565edfce5c Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Fri, 22 Sep 2023 03:35:26 +0530
Subject: [PATCH 034/156] feat(ingest): bulk fetch schema info for schema
 resolver (#8865)

Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
---
 .../src/datahub/ingestion/graph/client.py     | 426 +++++++++++-------
 .../ingestion/source/bigquery_v2/bigquery.py  |  25 +-
 .../src/datahub/utilities/sqlglot_lineage.py  |  34 ++
 3 files changed, 324 insertions(+), 161 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py
index b371ab181e133..38e965f7f6587 100644
--- a/metadata-ingestion/src/datahub/ingestion/graph/client.py
+++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py
@@ -44,14 +44,17 @@
     TelemetryClientIdClass,
 )
 from datahub.utilities.perf_timer import PerfTimer
-from datahub.utilities.urns.dataset_urn import DatasetUrn
 from datahub.utilities.urns.urn import Urn, guess_entity_type
 
 if TYPE_CHECKING:
     from datahub.ingestion.source.state.entity_removal_state import (
         GenericCheckpointState,
     )
-    from datahub.utilities.sqlglot_lineage import SchemaResolver, SqlParsingResult
+    from datahub.utilities.sqlglot_lineage import (
+        GraphQLSchemaMetadata,
+        SchemaResolver,
+        SqlParsingResult,
+    )
 
 
 logger = logging.getLogger(__name__)
@@ -543,129 +546,110 @@ def get_container_urns_by_filter(
             logger.debug(f"yielding {x['entity']}")
             yield x["entity"]
 
-    def get_urns_by_filter(
+    def _bulk_fetch_schema_info_by_filter(
         self,
         *,
-        entity_types: Optional[List[str]] = None,
         platform: Optional[str] = None,
         platform_instance: Optional[str] = None,
         env: Optional[str] = None,
         query: Optional[str] = None,
         container: Optional[str] = None,
         status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
-        batch_size: int = 10000,
+        batch_size: int = 100,
         extraFilters: Optional[List[SearchFilterRule]] = None,
-    ) -> Iterable[str]:
-        """Fetch all urns that match all of the given filters.
+    ) -> Iterable[Tuple[str, "GraphQLSchemaMetadata"]]:
+        """Fetch schema info for datasets that match all of the given filters.
 
-        Filters are combined conjunctively. If multiple filters are specified, the results will match all of them.
-        Note that specifying a platform filter will automatically exclude all entity types that do not have a platform.
-        The same goes for the env filter.
+        :return: An iterable of (urn, schema info) tuple that match the filters.
+        """
+        types = [_graphql_entity_type("dataset")]
 
-        :param entity_types: List of entity types to include. If None, all entity types will be returned.
-        :param platform: Platform to filter on. If None, all platforms will be returned.
-        :param platform_instance: Platform instance to filter on. If None, all platform instances will be returned.
-        :param env: Environment (e.g. PROD, DEV) to filter on. If None, all environments will be returned.
-        :param query: Query string to filter on. If None, all entities will be returned.
-        :param container: A container urn that entities must be within.
-            This works recursively, so it will include entities within sub-containers as well.
-            If None, all entities will be returned.
-            Note that this requires browsePathV2 aspects (added in 0.10.4+).
-        :param status: Filter on the deletion status of the entity. The default is only return non-soft-deleted entities.
-        :param extraFilters: Additional filters to apply. If specified, the results will match all of the filters.
+        # Add the query default of * if no query is specified.
+        query = query or "*"
 
-        :return: An iterable of urns that match the filters.
-        """
+        orFilters = self.generate_filter(
+            platform, platform_instance, env, container, status, extraFilters
+        )
 
-        types: Optional[List[str]] = None
-        if entity_types is not None:
-            if not entity_types:
-                raise ValueError(
-                    "entity_types cannot be an empty list; use None for all entities"
-                )
+        graphql_query = textwrap.dedent(
+            """
+            query scrollUrnsWithFilters(
+                $types: [EntityType!],
+                $query: String!,
+                $orFilters: [AndFilterInput!],
+                $batchSize: Int!,
+                $scrollId: String) {
 
-            types = [_graphql_entity_type(entity_type) for entity_type in entity_types]
+                scrollAcrossEntities(input: {
+                    query: $query,
+                    count: $batchSize,
+                    scrollId: $scrollId,
+                    types: $types,
+                    orFilters: $orFilters,
+                    searchFlags: {
+                        skipHighlighting: true
+                        skipAggregates: true
+                    }
+                }) {
+                    nextScrollId
+                    searchResults {
+                        entity {
+                            urn
+                            ... on Dataset {
+                                schemaMetadata(version: 0) {
+                                    fields {
+                                        fieldPath
+                                        nativeDataType
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            """
+        )
 
-        # Add the query default of * if no query is specified.
-        query = query or "*"
+        variables = {
+            "types": types,
+            "query": query,
+            "orFilters": orFilters,
+            "batchSize": batch_size,
+        }
+
+        for entity in self._scroll_across_entities(graphql_query, variables):
+            if entity.get("schemaMetadata"):
+                yield entity["urn"], entity["schemaMetadata"]
 
+    def generate_filter(
+        self,
+        platform: Optional[str],
+        platform_instance: Optional[str],
+        env: Optional[str],
+        container: Optional[str],
+        status: RemovedStatusFilter,
+        extraFilters: Optional[List[SearchFilterRule]],
+    ) -> List[Dict[str, List[SearchFilterRule]]]:
         andFilters: List[SearchFilterRule] = []
 
         # Platform filter.
         if platform:
-            andFilters += [
-                {
-                    "field": "platform.keyword",
-                    "values": [make_data_platform_urn(platform)],
-                    "condition": "EQUAL",
-                }
-            ]
+            andFilters.append(self._get_platform_filter(platform))
 
         # Platform instance filter.
         if platform_instance:
-            if platform:
-                # Massage the platform instance into a fully qualified urn, if necessary.
-                platform_instance = make_dataplatform_instance_urn(
-                    platform, platform_instance
-                )
-
-            # Warn if platform_instance is not a fully qualified urn.
-            # TODO: Change this once we have a first-class data platform instance urn type.
-            if guess_entity_type(platform_instance) != "dataPlatformInstance":
-                raise ValueError(
-                    f"Invalid data platform instance urn: {platform_instance}"
-                )
-
-            andFilters += [
-                {
-                    "field": "platformInstance",
-                    "values": [platform_instance],
-                    "condition": "EQUAL",
-                }
-            ]
+            andFilters.append(
+                self._get_platform_instance_filter(platform, platform_instance)
+            )
 
         # Browse path v2 filter.
         if container:
-            # Warn if container is not a fully qualified urn.
-            # TODO: Change this once we have a first-class container urn type.
-            if guess_entity_type(container) != "container":
-                raise ValueError(f"Invalid container urn: {container}")
-
-            andFilters += [
-                {
-                    "field": "browsePathV2",
-                    "values": [container],
-                    "condition": "CONTAIN",
-                }
-            ]
+            andFilters.append(self._get_container_filter(container))
 
         # Status filter.
-        if status == RemovedStatusFilter.NOT_SOFT_DELETED:
-            # Subtle: in some cases (e.g. when the dataset doesn't have a status aspect), the
-            # removed field is simply not present in the ElasticSearch document. Ideally this
-            # would be a "removed" : "false" filter, but that doesn't work. Instead, we need to
-            # use a negated filter.
-            andFilters.append(
-                {
-                    "field": "removed",
-                    "values": ["true"],
-                    "condition": "EQUAL",
-                    "negated": True,
-                }
-            )
-        elif status == RemovedStatusFilter.ONLY_SOFT_DELETED:
-            andFilters.append(
-                {
-                    "field": "removed",
-                    "values": ["true"],
-                    "condition": "EQUAL",
-                }
-            )
-        elif status == RemovedStatusFilter.ALL:
-            # We don't need to add a filter for this case.
-            pass
-        else:
-            raise ValueError(f"Invalid status filter: {status}")
+        status_filter = self._get_status_filer(status)
+        if status_filter:
+            andFilters.append(status_filter)
 
         # Extra filters.
         if extraFilters:
@@ -673,33 +657,9 @@ def get_urns_by_filter(
 
         orFilters: List[Dict[str, List[SearchFilterRule]]] = [{"and": andFilters}]
 
-        # Env filter.
+        # Env filter
         if env:
-            # The env filter is a bit more tricky since it's not always stored
-            # in the same place in ElasticSearch.
-
-            envOrConditions: List[SearchFilterRule] = [
-                # For most entity types, we look at the origin field.
-                {
-                    "field": "origin",
-                    "value": env,
-                    "condition": "EQUAL",
-                },
-                # For containers, we look at the customProperties field.
-                # For any containers created after https://github.com/datahub-project/datahub/pull/8027,
-                # we look for the "env" property. Otherwise, we use the "instance" property.
-                {
-                    "field": "customProperties",
-                    "value": f"env={env}",
-                },
-                {
-                    "field": "customProperties",
-                    "value": f"instance={env}",
-                },
-                # Note that not all entity types have an env (e.g. dashboards / charts).
-                # If the env filter is specified, these will be excluded.
-            ]
-
+            envOrConditions = self._get_env_or_conditions(env)
             # This matches ALL of the andFilters and at least one of the envOrConditions.
             orFilters = [
                 {"and": andFilters["and"] + [extraCondition]}
@@ -707,6 +667,52 @@ def get_urns_by_filter(
                 for andFilters in orFilters
             ]
 
+        return orFilters
+
+    def get_urns_by_filter(
+        self,
+        *,
+        entity_types: Optional[List[str]] = None,
+        platform: Optional[str] = None,
+        platform_instance: Optional[str] = None,
+        env: Optional[str] = None,
+        query: Optional[str] = None,
+        container: Optional[str] = None,
+        status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
+        batch_size: int = 10000,
+        extraFilters: Optional[List[SearchFilterRule]] = None,
+    ) -> Iterable[str]:
+        """Fetch all urns that match all of the given filters.
+
+        Filters are combined conjunctively. If multiple filters are specified, the results will match all of them.
+        Note that specifying a platform filter will automatically exclude all entity types that do not have a platform.
+        The same goes for the env filter.
+
+        :param entity_types: List of entity types to include. If None, all entity types will be returned.
+        :param platform: Platform to filter on. If None, all platforms will be returned.
+        :param platform_instance: Platform instance to filter on. If None, all platform instances will be returned.
+        :param env: Environment (e.g. PROD, DEV) to filter on. If None, all environments will be returned.
+        :param query: Query string to filter on. If None, all entities will be returned.
+        :param container: A container urn that entities must be within.
+            This works recursively, so it will include entities within sub-containers as well.
+            If None, all entities will be returned.
+            Note that this requires browsePathV2 aspects (added in 0.10.4+).
+        :param status: Filter on the deletion status of the entity. The default is only return non-soft-deleted entities.
+        :param extraFilters: Additional filters to apply. If specified, the results will match all of the filters.
+
+        :return: An iterable of urns that match the filters.
+        """
+
+        types = self._get_types(entity_types)
+
+        # Add the query default of * if no query is specified.
+        query = query or "*"
+
+        # Env filter.
+        orFilters = self.generate_filter(
+            platform, platform_instance, env, container, status, extraFilters
+        )
+
         graphql_query = textwrap.dedent(
             """
             query scrollUrnsWithFilters(
@@ -738,18 +744,26 @@ def get_urns_by_filter(
             """
         )
 
+        variables = {
+            "types": types,
+            "query": query,
+            "orFilters": orFilters,
+            "batchSize": batch_size,
+        }
+
+        for entity in self._scroll_across_entities(graphql_query, variables):
+            yield entity["urn"]
+
+    def _scroll_across_entities(
+        self, graphql_query: str, variables_orig: dict
+    ) -> Iterable[dict]:
+        variables = variables_orig.copy()
         first_iter = True
         scroll_id: Optional[str] = None
         while first_iter or scroll_id:
             first_iter = False
+            variables["scrollId"] = scroll_id
 
-            variables = {
-                "types": types,
-                "query": query,
-                "orFilters": orFilters,
-                "batchSize": batch_size,
-                "scrollId": scroll_id,
-            }
             response = self.execute_graphql(
                 graphql_query,
                 variables=variables,
@@ -757,13 +771,116 @@ def get_urns_by_filter(
             data = response["scrollAcrossEntities"]
             scroll_id = data["nextScrollId"]
             for entry in data["searchResults"]:
-                yield entry["entity"]["urn"]
+                yield entry["entity"]
 
             if scroll_id:
                 logger.debug(
                     f"Scrolling to next scrollAcrossEntities page: {scroll_id}"
                 )
 
+    def _get_env_or_conditions(self, env: str) -> List[SearchFilterRule]:
+        # The env filter is a bit more tricky since it's not always stored
+        # in the same place in ElasticSearch.
+        return [
+            # For most entity types, we look at the origin field.
+            {
+                "field": "origin",
+                "value": env,
+                "condition": "EQUAL",
+            },
+            # For containers, we look at the customProperties field.
+            # For any containers created after https://github.com/datahub-project/datahub/pull/8027,
+            # we look for the "env" property. Otherwise, we use the "instance" property.
+            {
+                "field": "customProperties",
+                "value": f"env={env}",
+            },
+            {
+                "field": "customProperties",
+                "value": f"instance={env}",
+            },
+            # Note that not all entity types have an env (e.g. dashboards / charts).
+            # If the env filter is specified, these will be excluded.
+        ]
+
+    def _get_status_filer(
+        self, status: RemovedStatusFilter
+    ) -> Optional[SearchFilterRule]:
+        if status == RemovedStatusFilter.NOT_SOFT_DELETED:
+            # Subtle: in some cases (e.g. when the dataset doesn't have a status aspect), the
+            # removed field is simply not present in the ElasticSearch document. Ideally this
+            # would be a "removed" : "false" filter, but that doesn't work. Instead, we need to
+            # use a negated filter.
+            return {
+                "field": "removed",
+                "values": ["true"],
+                "condition": "EQUAL",
+                "negated": True,
+            }
+
+        elif status == RemovedStatusFilter.ONLY_SOFT_DELETED:
+            return {
+                "field": "removed",
+                "values": ["true"],
+                "condition": "EQUAL",
+            }
+
+        elif status == RemovedStatusFilter.ALL:
+            # We don't need to add a filter for this case.
+            return None
+        else:
+            raise ValueError(f"Invalid status filter: {status}")
+
+    def _get_container_filter(self, container: str) -> SearchFilterRule:
+        # Warn if container is not a fully qualified urn.
+        # TODO: Change this once we have a first-class container urn type.
+        if guess_entity_type(container) != "container":
+            raise ValueError(f"Invalid container urn: {container}")
+
+        return {
+            "field": "browsePathV2",
+            "values": [container],
+            "condition": "CONTAIN",
+        }
+
+    def _get_platform_instance_filter(
+        self, platform: Optional[str], platform_instance: str
+    ) -> SearchFilterRule:
+        if platform:
+            # Massage the platform instance into a fully qualified urn, if necessary.
+            platform_instance = make_dataplatform_instance_urn(
+                platform, platform_instance
+            )
+
+        # Warn if platform_instance is not a fully qualified urn.
+        # TODO: Change this once we have a first-class data platform instance urn type.
+        if guess_entity_type(platform_instance) != "dataPlatformInstance":
+            raise ValueError(f"Invalid data platform instance urn: {platform_instance}")
+
+        return {
+            "field": "platformInstance",
+            "values": [platform_instance],
+            "condition": "EQUAL",
+        }
+
+    def _get_platform_filter(self, platform: str) -> SearchFilterRule:
+        return {
+            "field": "platform.keyword",
+            "values": [make_data_platform_urn(platform)],
+            "condition": "EQUAL",
+        }
+
+    def _get_types(self, entity_types: Optional[List[str]]) -> Optional[List[str]]:
+        types: Optional[List[str]] = None
+        if entity_types is not None:
+            if not entity_types:
+                raise ValueError(
+                    "entity_types cannot be an empty list; use None for all entities"
+                )
+
+            types = [_graphql_entity_type(entity_type) for entity_type in entity_types]
+        return types
+
     def get_latest_pipeline_checkpoint(
         self, pipeline_name: str, platform: str
     ) -> Optional[Checkpoint["GenericCheckpointState"]]:
@@ -1033,43 +1150,36 @@ def initialize_schema_resolver_from_datahub(
         self, platform: str, platform_instance: Optional[str], env: str
     ) -> Tuple["SchemaResolver", Set[str]]:
         logger.info("Initializing schema resolver")
-
-        # TODO: Filter on platform instance?
-        logger.info(f"Fetching urns for platform {platform}, env {env}")
-        with PerfTimer() as timer:
-            urns = set(
-                self.get_urns_by_filter(
-                    entity_types=[DatasetUrn.ENTITY_TYPE],
-                    platform=platform,
-                    env=env,
-                    batch_size=3000,
-                )
-            )
-            logger.info(
-                f"Fetched {len(urns)} urns in {timer.elapsed_seconds()} seconds"
-            )
-
         schema_resolver = self._make_schema_resolver(
             platform, platform_instance, env, include_graph=False
         )
+
+        logger.info(f"Fetching schemas for platform {platform}, env {env}")
+        urns = []
+        count = 0
         with PerfTimer() as timer:
-            count = 0
-            for i, urn in enumerate(urns):
-                if i % 1000 == 0:
-                    logger.debug(f"Loaded {i} schema metadata")
+            for urn, schema_info in self._bulk_fetch_schema_info_by_filter(
+                platform=platform,
+                platform_instance=platform_instance,
+                env=env,
+            ):
                 try:
-                    schema_metadata = self.get_aspect(urn, SchemaMetadataClass)
-                    if schema_metadata:
-                        schema_resolver.add_schema_metadata(urn, schema_metadata)
-                        count += 1
+                    urns.append(urn)
+                    schema_resolver.add_graphql_schema_metadata(urn, schema_info)
+                    count += 1
                 except Exception:
-                    logger.warning("Failed to load schema metadata", exc_info=True)
+                    logger.warning("Failed to add schema info", exc_info=True)
+
+                if count % 1000 == 0:
+                    logger.debug(
+                        f"Loaded {count} schema info in {timer.elapsed_seconds()} seconds"
+                    )
             logger.info(
-                f"Loaded {count} schema metadata in {timer.elapsed_seconds()} seconds"
+                f"Finished loading total {count} schema info in {timer.elapsed_seconds()} seconds"
             )
 
         logger.info("Finished initializing schema resolver")
-        return schema_resolver, urns
+        return schema_resolver, set(urns)
 
     def parse_sql_lineage(
         self,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
index ae49a4ba17c11..8a16b1a4a5f6b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
@@ -285,9 +285,7 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config):
         # Maps view ref -> actual sql
         self.view_definitions: FileBackedDict[str] = FileBackedDict()
 
-        self.sql_parser_schema_resolver = SchemaResolver(
-            platform=self.platform, env=self.config.env
-        )
+        self.sql_parser_schema_resolver = self._init_schema_resolver()
 
         self.add_config_to_report()
         atexit.register(cleanup, config)
@@ -446,6 +444,27 @@ def test_connection(config_dict: dict) -> TestConnectionReport:
             )
             return test_report
 
+    def _init_schema_resolver(self) -> SchemaResolver:
+        schema_resolution_required = (
+            self.config.lineage_parse_view_ddl or self.config.lineage_use_sql_parser
+        )
+        schema_ingestion_enabled = (
+            self.config.include_views and self.config.include_tables
+        )
+
+        if schema_resolution_required and not schema_ingestion_enabled:
+            if self.ctx.graph:
+                return self.ctx.graph.initialize_schema_resolver_from_datahub(
+                    platform=self.platform,
+                    platform_instance=self.config.platform_instance,
+                    env=self.config.env,
+                )[0]
+            else:
+                logger.warning(
+                    "Failed to load schema info from DataHub as DataHubGraph is missing.",
+                )
+        return SchemaResolver(platform=self.platform, env=self.config.env)
+
     def get_dataplatform_instance_aspect(
         self, dataset_urn: str, project_id: str
     ) -> MetadataWorkUnit:
diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
index d677b0874b985..f18235af3d1fd 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
@@ -14,6 +14,7 @@
 import sqlglot.optimizer.qualify
 import sqlglot.optimizer.qualify_columns
 from pydantic import BaseModel
+from typing_extensions import TypedDict
 
 from datahub.emitter.mce_builder import (
     DEFAULT_ENV,
@@ -36,6 +37,15 @@
 SQL_PARSE_RESULT_CACHE_SIZE = 1000
 
 
+class GraphQLSchemaField(TypedDict):
+    fieldPath: str
+    nativeDataType: str
+
+
+class GraphQLSchemaMetadata(TypedDict):
+    fields: List[GraphQLSchemaField]
+
+
 class QueryType(enum.Enum):
     CREATE = "CREATE"
     SELECT = "SELECT"
@@ -330,6 +340,12 @@ def add_schema_metadata(
     def add_raw_schema_info(self, urn: str, schema_info: SchemaInfo) -> None:
         self._save_to_cache(urn, schema_info)
 
+    def add_graphql_schema_metadata(
+        self, urn: str, schema_metadata: GraphQLSchemaMetadata
+    ) -> None:
+        schema_info = self.convert_graphql_schema_metadata_to_info(schema_metadata)
+        self._save_to_cache(urn, schema_info)
+
     def _save_to_cache(self, urn: str, schema_info: Optional[SchemaInfo]) -> None:
         self._schema_cache[urn] = schema_info
 
@@ -356,6 +372,24 @@ def _convert_schema_aspect_to_info(
             not in DatasetUrn.get_simple_field_path_from_v2_field_path(col.fieldPath)
         }
 
+    @classmethod
+    def convert_graphql_schema_metadata_to_info(
+        cls, schema: GraphQLSchemaMetadata
+    ) -> SchemaInfo:
+        return {
+            DatasetUrn.get_simple_field_path_from_v2_field_path(field["fieldPath"]): (
+                # The actual types are more of a "nice to have".
+                field["nativeDataType"]
+                or "str"
+            )
+            for field in schema["fields"]
+            # TODO: We can't generate lineage to columns nested within structs yet.
+            if "."
+            not in DatasetUrn.get_simple_field_path_from_v2_field_path(
+                field["fieldPath"]
+            )
+        }
+
     # TODO add a method to load all from graphql
 
     def close(self) -> None:

From 4be8fd0905b6631ddf7161ab412719bed786882a Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Thu, 21 Sep 2023 15:59:56 -0700
Subject: [PATCH 035/156] fix(docs): remove link-checker from CI (#8883)

---
 docs-website/markdown-link-check-config.json | 37 ++++++++------------
 docs-website/package.json                    |  6 ++--
 2 files changed, 18 insertions(+), 25 deletions(-)

diff --git a/docs-website/markdown-link-check-config.json b/docs-website/markdown-link-check-config.json
index 26e040edde6f7..2f5a51ada324e 100644
--- a/docs-website/markdown-link-check-config.json
+++ b/docs-website/markdown-link-check-config.json
@@ -1,50 +1,41 @@
 {
   "ignorePatterns": [
     {
-    "pattern": "^http://demo\\.datahubproject\\.io"
+      "pattern": "^https?://demo\\.datahubproject\\.io"
     },
     {
-    "pattern": "^http://localhost"
+      "pattern": "^http://localhost"
     },
     {
-    "pattern": "^http://www.famfamfam.com"
+      "pattern": "^/docs"
     },
     {
-    "pattern": "^http://www.linkedin.com"
+      "pattern": "^/integrations"
     },
     {
-    "pattern": "\\.md$"
+      "pattern": "^https?://www.linkedin.com"
     },
     {
-    "pattern":"\\.json$"
+      "pattern": "\\.md(#.*)?$"
     },
     {
-    "pattern":"\\.txt$"
+      "pattern": "\\.json$"
     },
     {
-    "pattern": "\\.java$"
+      "pattern": "\\.txt$"
     },
     {
-    "pattern": "\\.md#.*$"
+      "pattern": "\\.java$"
     },
     {
-    "pattern": "^https://oauth2.googleapis.com/token"
+      "pattern": "^https://oauth2.googleapis.com/token"
     },
     {
-    "pattern": "^https://login.microsoftonline.com/common/oauth2/na$"
+      "pattern": "^https://login.microsoftonline.com/common/oauth2/na$"
     },
     {
-    "pattern": "#v(\\d+)-(\\d+)-(\\d+)"
-    },
-    {
-    "pattern": "^https://github.com/mohdsiddique$"
-    },
-    {
-    "pattern": "^https://github.com/2x$"
-    },
-    {
-    "pattern": "^https://github.com/datahub-project/datahub/assets/15873986/2f47d033-6c2b-483a-951d-e6d6b807f0d0%22%3E$"
+      "pattern": "^https://github.com/datahub-project/datahub/assets/15873986/2f47d033-6c2b-483a-951d-e6d6b807f0d0%22%3E$"
     }
   ],
-  "aliveStatusCodes": [200, 206, 0, 999, 400, 401, 403]
-}
\ No newline at end of file
+  "aliveStatusCodes": [200, 206, 0, 999]
+}
diff --git a/docs-website/package.json b/docs-website/package.json
index 1722f92169692..eca6e5814d3c6 100644
--- a/docs-website/package.json
+++ b/docs-website/package.json
@@ -17,8 +17,10 @@
     "generate": "rm -rf genDocs genStatic && mkdir genDocs genStatic && yarn _generate-docs && mv docs/* genDocs/ && rmdir docs",
     "generate-rsync": "mkdir -p genDocs genStatic && yarn _generate-docs && rsync -v --checksum -r -h -i --delete docs/ genDocs && rm -rf docs",
     "lint": "prettier -w generateDocsDir.ts sidebars.js src/pages/index.js",
-    "lint-check": "prettier -l generateDocsDir.ts sidebars.js src/pages/index.js && find ./genDocs -name \\*.md -not -path \"./genDocs/python-sdk/models.md\" -print0 | xargs -0 -n1 markdown-link-check -p -q -c markdown-link-check-config.json",
-    "lint-fix": "prettier --write generateDocsDir.ts sidebars.js src/pages/index.js"
+    "lint-check": "prettier -l generateDocsDir.ts sidebars.js src/pages/index.js",
+    "lint-fix": "prettier --write generateDocsDir.ts sidebars.js src/pages/index.js",
+    "_list-link-check-files": "find ./genDocs -name '*.md' -not \\( -path './genDocs/python-sdk/*' -o -path './genDocs/releases.md' \\)",
+    "check-links": "yarn run -s _list-link-check-files -print0 | xargs -0 -n1 -t markdown-link-check -q -c markdown-link-check-config.json"
   },
   "dependencies": {
     "@ant-design/icons": "^4.7.0",

From aef49b8fb2478f8a1b902aaee16fee9c07c7beab Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Thu, 21 Sep 2023 22:00:14 -0500
Subject: [PATCH 036/156] feat(entity-client): enable client side cache for
 entity-client and usage-client (#8877)

---
 datahub-frontend/app/auth/AuthModule.java     |  25 +++-
 .../app/auth/sso/oidc/OidcCallbackLogic.java  |   7 +-
 .../app/config/ConfigurationProvider.java     |  27 ++++
 .../controllers/SsoCallbackController.java    |   6 +-
 datahub-frontend/play.gradle                  |   8 +-
 .../datahub/graphql/GmsGraphQLEngine.java     |   3 +
 .../datahub/graphql/GmsGraphQLEngineArgs.java |   2 +
 .../dataset/DatasetStatsSummaryResolver.java  |  19 ++-
 .../dataset/DatasetUsageStatsResolver.java    |   8 +-
 .../dashboard/DashboardStatsSummaryTest.java  |   3 +-
 .../DatasetStatsSummaryResolverTest.java      |  17 ++-
 .../common/steps/GMSDisableWriteModeStep.java |   8 +-
 .../common/steps/GMSEnableWriteModeStep.java  |   9 +-
 .../upgrade/config/NoCodeUpgradeConfig.java   |  10 +-
 .../upgrade/config/RestoreBackupConfig.java   |  10 +-
 .../datahub/upgrade/nocode/NoCodeUpgrade.java |  12 +-
 .../upgrade/restorebackup/RestoreBackup.java  |  15 +-
 .../client/SystemJavaEntityClient.java        |  39 +++++
 ...sInstanceRunEventChangeEventGenerator.java |  12 +-
 .../EntityChangeEventGenerator.java           |   8 +-
 ...eConsumerApplicationTestConfiguration.java |   4 +-
 .../kafka/config/EntityHydratorConfig.java    |  24 +--
 .../event/EntityChangeEventGeneratorHook.java |  17 +--
 .../hook/siblings/SiblingAssociationHook.java |  38 ++---
 .../kafka/hydrator/EntityHydrator.java        |  22 ++-
 .../EntityChangeEventGeneratorHookTest.java   |  28 ++--
 .../siblings/SiblingAssociationHookTest.java  |  78 +++-------
 .../spring/MCLSpringTestConfiguration.java    |   6 +-
 .../kafka/MceConsumerApplication.java         |   2 +-
 .../kafka/MetadataChangeEventsProcessor.java  |   8 +-
 .../MetadataChangeProposalsProcessor.java     |  12 +-
 metadata-service/configuration/build.gradle   |   1 +
 .../config/cache/CacheConfiguration.java      |   2 +
 .../cache/client/ClientCacheConfig.java       |  10 ++
 .../client/ClientCacheConfiguration.java      |   9 ++
 .../cache/client/EntityClientCacheConfig.java |  17 +++
 .../cache/client/UsageClientCacheConfig.java  |  12 ++
 .../spring/YamlPropertySourceFactory.java     |  10 +-
 .../src/main/resources/application.yml        |  24 +++
 .../factory/auth/AuthorizerChainFactory.java  |   2 +-
 .../auth/DataHubAuthorizerFactory.java        |   2 +-
 .../auth/DataHubTokenServiceFactory.java      |   3 +-
 .../gms/factory/auth/GroupServiceFactory.java |   2 +-
 .../auth/InviteTokenServiceFactory.java       |   2 +-
 .../auth/NativeUserServiceFactory.java        |   2 +-
 .../gms/factory/auth/PostServiceFactory.java  |   2 +-
 .../gms/factory/auth/RoleServiceFactory.java  |   2 +-
 .../auth/SystemAuthenticationFactory.java     |   2 +-
 .../ElasticSearchGraphServiceFactory.java     |   2 +-
 ...ticSearchSystemMetadataServiceFactory.java |   2 +-
 .../ElasticsearchSSLContextFactory.java       |   2 +-
 .../factory/common/GraphServiceFactory.java   |   2 +-
 .../common/IndexConventionFactory.java        |   2 +-
 .../common/LocalEbeanServerConfigFactory.java |   2 +-
 .../factory/common/Neo4jDriverFactory.java    |   2 +-
 .../common/RestHighLevelClientFactory.java    |   2 +-
 .../factory/config/ConfigurationProvider.java |   2 +-
 .../DataProductServiceFactory.java            |   2 +-
 .../entity/JavaEntityClientFactory.java       |  29 +++-
 .../entity/RestliEntityClientFactory.java     |  19 ++-
 .../entity/RetentionServiceFactory.java       |   2 +-
 .../ConfigEntityRegistryFactory.java          |   2 +-
 .../PluginEntityRegistryFactory.java          |   2 +-
 .../factory/graphql/GraphQLEngineFactory.java |   6 +
 .../ingestion/IngestionSchedulerFactory.java  |   2 +-
 .../DataHubKafkaEventProducerFactory.java     |   2 +-
 .../kafka/DataHubKafkaProducerFactory.java    |   2 +-
 .../AwsGlueSchemaRegistryFactory.java         |   2 +-
 .../KafkaSchemaRegistryFactory.java           |   2 +-
 .../lineage/LineageServiceFactory.java        |   2 +-
 .../OwnershipTypeServiceFactory.java          |   2 +-
 .../factory/query/QueryServiceFactory.java    |   2 +-
 .../BaseElasticSearchComponentsFactory.java   |   2 +-
 .../CachingEntitySearchServiceFactory.java    |   2 +-
 .../ElasticSearchBulkProcessorFactory.java    |   2 +-
 .../ElasticSearchIndexBuilderFactory.java     |   2 +-
 .../search/ElasticSearchServiceFactory.java   |   2 +-
 .../search/LineageSearchServiceFactory.java   |   2 +-
 .../SearchDocumentTransformerFactory.java     |   2 +-
 .../factory/search/SearchServiceFactory.java  |   2 +-
 .../search/SettingsBuilderFactory.java        |   2 +-
 .../search/views/ViewServiceFactory.java      |   2 +-
 .../settings/SettingsServiceFactory.java      |   2 +-
 .../factory/telemetry/MixpanelApiFactory.java |   2 +-
 .../MixpanelMessageBuilderFactory.java        |   2 +-
 .../telemetry/TrackingServiceFactory.java     |   2 +-
 ...tyChangeEventGeneratorRegistryFactory.java |   6 +-
 .../timeline/TimelineServiceFactory.java      |   2 +-
 ...cSearchTimeseriesAspectServiceFactory.java |   2 +-
 .../gms/factory/usage/UsageClientFactory.java |  15 +-
 .../IngestRetentionPoliciesStepFactory.java   |   2 +-
 .../openapi/util/OpenApiEntitiesUtilTest.java |   2 +-
 metadata-service/restli-client/build.gradle   |   1 +
 .../linkedin/common/client/ClientCache.java   | 134 +++++++++++++++++
 .../entity/client/EntityClientCache.java      | 141 ++++++++++++++++++
 .../entity/client/SystemEntityClient.java     |  91 +++++++++++
 .../client/SystemRestliEntityClient.java      |  25 ++++
 .../java/com/linkedin/usage/UsageClient.java  |  33 +++-
 .../com/linkedin/usage/UsageClientCache.java  |  75 ++++++++++
 .../metadata/utils/metrics/MetricUtils.java   |   5 +
 100 files changed, 951 insertions(+), 298 deletions(-)
 create mode 100644 datahub-frontend/app/config/ConfigurationProvider.java
 create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java
 create mode 100644 metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/ClientCacheConfig.java
 create mode 100644 metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/ClientCacheConfiguration.java
 create mode 100644 metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/EntityClientCacheConfig.java
 create mode 100644 metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/UsageClientCacheConfig.java
 rename metadata-service/{factories/src/main/java/com/linkedin/gms/factory => configuration/src/main/java/com/linkedin/metadata}/spring/YamlPropertySourceFactory.java (87%)
 create mode 100644 metadata-service/restli-client/src/main/java/com/linkedin/common/client/ClientCache.java
 create mode 100644 metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClientCache.java
 create mode 100644 metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemEntityClient.java
 create mode 100644 metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java
 create mode 100644 metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClientCache.java

diff --git a/datahub-frontend/app/auth/AuthModule.java b/datahub-frontend/app/auth/AuthModule.java
index eb95078b1a640..98f3b82285eda 100644
--- a/datahub-frontend/app/auth/AuthModule.java
+++ b/datahub-frontend/app/auth/AuthModule.java
@@ -11,16 +11,19 @@
 import com.google.inject.AbstractModule;
 import com.google.inject.Provides;
 import com.google.inject.Singleton;
-import com.linkedin.entity.client.EntityClient;
-import com.linkedin.entity.client.RestliEntityClient;
+import com.linkedin.entity.client.SystemEntityClient;
+import com.linkedin.entity.client.SystemRestliEntityClient;
 import com.linkedin.metadata.restli.DefaultRestliClientFactory;
 import com.linkedin.parseq.retry.backoff.ExponentialBackoff;
 import com.linkedin.util.Configuration;
+import config.ConfigurationProvider;
 import controllers.SsoCallbackController;
+
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+
 import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.http.impl.client.CloseableHttpClient;
 import org.apache.http.impl.client.HttpClients;
@@ -34,6 +37,7 @@
 import org.pac4j.play.store.PlayCookieSessionStore;
 import org.pac4j.play.store.PlaySessionStore;
 import org.pac4j.play.store.ShiroAesDataEncrypter;
+import org.springframework.context.annotation.AnnotationConfigApplicationContext;
 import play.Environment;
 import play.cache.SyncCacheApi;
 import utils.ConfigUtil;
@@ -104,7 +108,7 @@ protected void configure() {
             bind(SsoCallbackController.class).toConstructor(SsoCallbackController.class.getConstructor(
                 SsoManager.class,
                 Authentication.class,
-                EntityClient.class,
+                SystemEntityClient.class,
                 AuthServiceClient.class,
                 com.typesafe.config.Config.class));
         } catch (NoSuchMethodException | SecurityException e) {
@@ -161,10 +165,19 @@ protected Authentication provideSystemAuthentication() {
 
     @Provides
     @Singleton
-    protected EntityClient provideEntityClient() {
-        return new RestliEntityClient(buildRestliClient(),
+    protected ConfigurationProvider provideConfigurationProvider() {
+        AnnotationConfigApplicationContext context = new AnnotationConfigApplicationContext(ConfigurationProvider.class);
+        return context.getBean(ConfigurationProvider.class);
+    }
+
+    @Provides
+    @Singleton
+    protected SystemEntityClient provideEntityClient(final Authentication systemAuthentication,
+                                                     final ConfigurationProvider configurationProvider) {
+        return new SystemRestliEntityClient(buildRestliClient(),
                 new ExponentialBackoff(_configs.getInt(ENTITY_CLIENT_RETRY_INTERVAL)),
-                _configs.getInt(ENTITY_CLIENT_NUM_RETRIES));
+                _configs.getInt(ENTITY_CLIENT_NUM_RETRIES), systemAuthentication,
+                configurationProvider.getCache().getClient().getEntityClient());
     }
 
     @Provides
diff --git a/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java b/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java
index 85139d1db0868..4bde0872fc082 100644
--- a/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java
+++ b/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java
@@ -13,7 +13,7 @@
 import com.linkedin.common.urn.Urn;
 import com.linkedin.data.template.SetMode;
 import com.linkedin.entity.Entity;
-import com.linkedin.entity.client.EntityClient;
+import com.linkedin.entity.client.SystemEntityClient;
 import com.linkedin.events.metadata.ChangeType;
 import com.linkedin.identity.CorpGroupInfo;
 import com.linkedin.identity.CorpUserEditableInfo;
@@ -78,13 +78,14 @@
 public class OidcCallbackLogic extends DefaultCallbackLogic<Result, PlayWebContext> {
 
   private final SsoManager _ssoManager;
-  private final EntityClient _entityClient;
+  private final SystemEntityClient _entityClient;
   private final Authentication _systemAuthentication;
   private final AuthServiceClient _authClient;
   private final CookieConfigs _cookieConfigs;
 
   public OidcCallbackLogic(final SsoManager ssoManager, final Authentication systemAuthentication,
-      final EntityClient entityClient, final AuthServiceClient authClient, final CookieConfigs cookieConfigs) {
+                           final SystemEntityClient entityClient, final AuthServiceClient authClient,
+                           final CookieConfigs cookieConfigs) {
     _ssoManager = ssoManager;
     _systemAuthentication = systemAuthentication;
     _entityClient = entityClient;
diff --git a/datahub-frontend/app/config/ConfigurationProvider.java b/datahub-frontend/app/config/ConfigurationProvider.java
new file mode 100644
index 0000000000000..00a5472ec3476
--- /dev/null
+++ b/datahub-frontend/app/config/ConfigurationProvider.java
@@ -0,0 +1,27 @@
+package config;
+
+import com.linkedin.metadata.config.cache.CacheConfiguration;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
+import lombok.Data;
+
+import org.springframework.boot.context.properties.ConfigurationProperties;
+import org.springframework.boot.context.properties.EnableConfigurationProperties;
+import org.springframework.context.annotation.PropertySource;
+
+
+/**
+ * Minimal sharing between metadata-service and frontend
+ * Initially for use of client caching configuration.
+ * Does not use the factories module to avoid transitive dependencies.
+ */
+@EnableConfigurationProperties
+@PropertySource(value = "application.yml", factory = YamlPropertySourceFactory.class)
+@ConfigurationProperties
+@Data
+public class ConfigurationProvider {
+
+    /**
+     * Configuration for caching
+     */
+    private CacheConfiguration cache;
+}
diff --git a/datahub-frontend/app/controllers/SsoCallbackController.java b/datahub-frontend/app/controllers/SsoCallbackController.java
index 5a36d833deceb..7a4b5585cc21a 100644
--- a/datahub-frontend/app/controllers/SsoCallbackController.java
+++ b/datahub-frontend/app/controllers/SsoCallbackController.java
@@ -3,7 +3,7 @@
 import auth.CookieConfigs;
 import client.AuthServiceClient;
 import com.datahub.authentication.Authentication;
-import com.linkedin.entity.client.EntityClient;
+import com.linkedin.entity.client.SystemEntityClient;
 import java.net.URLEncoder;
 import java.nio.charset.StandardCharsets;
 import java.util.concurrent.CompletableFuture;
@@ -40,7 +40,7 @@ public class SsoCallbackController extends CallbackController {
   public SsoCallbackController(
       @Nonnull SsoManager ssoManager,
       @Nonnull Authentication systemAuthentication,
-      @Nonnull EntityClient entityClient,
+      @Nonnull SystemEntityClient entityClient,
       @Nonnull AuthServiceClient authClient,
       @Nonnull com.typesafe.config.Config configs) {
     _ssoManager = ssoManager;
@@ -79,7 +79,7 @@ public class SsoCallbackLogic implements CallbackLogic<Result, PlayWebContext> {
     private final OidcCallbackLogic _oidcCallbackLogic;
 
     SsoCallbackLogic(final SsoManager ssoManager, final Authentication systemAuthentication,
-        final EntityClient entityClient, final AuthServiceClient authClient, final CookieConfigs cookieConfigs) {
+        final SystemEntityClient entityClient, final AuthServiceClient authClient, final CookieConfigs cookieConfigs) {
       _oidcCallbackLogic = new OidcCallbackLogic(ssoManager, systemAuthentication, entityClient, authClient, cookieConfigs);
     }
 
diff --git a/datahub-frontend/play.gradle b/datahub-frontend/play.gradle
index e40f8e3eeb96d..daecba16cbf72 100644
--- a/datahub-frontend/play.gradle
+++ b/datahub-frontend/play.gradle
@@ -16,9 +16,6 @@ dependencies {
   implementation project(':datahub-web-react')
 
   constraints {
-    play(externalDependency.springCore)
-    play(externalDependency.springBeans)
-    play(externalDependency.springContext)
     play(externalDependency.jacksonDataBind)
     play('com.nimbusds:oauth2-oidc-sdk:8.36.2')
     play('com.nimbusds:nimbus-jose-jwt:8.18')
@@ -35,7 +32,12 @@ dependencies {
 
   implementation project(":metadata-service:restli-client")
   implementation project(":metadata-service:auth-config")
+  implementation project(":metadata-service:configuration")
 
+  implementation externalDependency.springCore
+  implementation externalDependency.springBeans
+  implementation externalDependency.springContext
+  implementation externalDependency.springBootAutoconfigure
   implementation externalDependency.jettyJaas
   implementation externalDependency.graphqlJava
   implementation externalDependency.antlr4Runtime
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
index d86234cf59306..3ba0cc1f747e3 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
@@ -302,6 +302,7 @@
 import com.linkedin.datahub.graphql.types.test.TestType;
 import com.linkedin.datahub.graphql.types.view.DataHubViewType;
 import com.linkedin.entity.client.EntityClient;
+import com.linkedin.entity.client.SystemEntityClient;
 import com.linkedin.metadata.config.DataHubConfiguration;
 import com.linkedin.metadata.config.IngestionConfiguration;
 import com.linkedin.metadata.config.TestsConfiguration;
@@ -364,6 +365,7 @@
 public class GmsGraphQLEngine {
 
     private final EntityClient entityClient;
+    private final SystemEntityClient systemEntityClient;
     private final GraphClient graphClient;
     private final UsageClient usageClient;
     private final SiblingGraphService siblingGraphService;
@@ -476,6 +478,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) {
         this.graphQLPlugins.forEach(plugin -> plugin.init(args));
 
         this.entityClient = args.entityClient;
+        this.systemEntityClient = args.systemEntityClient;
         this.graphClient = args.graphClient;
         this.usageClient = args.usageClient;
         this.siblingGraphService = args.siblingGraphService;
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java
index cbcf42c4f93d9..157fb10ce7078 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java
@@ -11,6 +11,7 @@
 import com.linkedin.datahub.graphql.analytics.service.AnalyticsService;
 import com.linkedin.datahub.graphql.featureflags.FeatureFlags;
 import com.linkedin.entity.client.EntityClient;
+import com.linkedin.entity.client.SystemEntityClient;
 import com.linkedin.metadata.config.DataHubConfiguration;
 import com.linkedin.metadata.config.IngestionConfiguration;
 import com.linkedin.metadata.config.TestsConfiguration;
@@ -38,6 +39,7 @@
 @Data
 public class GmsGraphQLEngineArgs {
     EntityClient entityClient;
+    SystemEntityClient systemEntityClient;
     GraphClient graphClient;
     UsageClient usageClient;
     AnalyticsService analyticsService;
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java
index f27fd604a746f..23be49c7e7140 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java
@@ -1,13 +1,16 @@
 package com.linkedin.datahub.graphql.resolvers.dataset;
 
+import com.datahub.authorization.ResourceSpec;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.datahub.graphql.QueryContext;
+import com.linkedin.datahub.graphql.authorization.AuthorizationUtils;
 import com.linkedin.datahub.graphql.generated.CorpUser;
 import com.linkedin.datahub.graphql.generated.DatasetStatsSummary;
 import com.linkedin.datahub.graphql.generated.Entity;
+import com.linkedin.metadata.authorization.PoliciesConfig;
 import com.linkedin.usage.UsageClient;
 import com.linkedin.usage.UsageTimeRange;
 import com.linkedin.usage.UserUsageCounts;
@@ -15,6 +18,7 @@
 import graphql.schema.DataFetchingEnvironment;
 import java.util.List;
 import java.util.Objects;
+import java.util.Optional;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
@@ -55,8 +59,15 @@ public CompletableFuture<DatasetStatsSummary> get(DataFetchingEnvironment enviro
 
       try {
 
+        if (!isAuthorized(resourceUrn, context)) {
+          log.debug("User {} is not authorized to view profile information for dataset {}",
+                  context.getActorUrn(),
+                  resourceUrn.toString());
+          return null;
+        }
+
         com.linkedin.usage.UsageQueryResult
-            usageQueryResult = usageClient.getUsageStats(resourceUrn.toString(), UsageTimeRange.MONTH, context.getAuthentication());
+            usageQueryResult = usageClient.getUsageStats(resourceUrn.toString(), UsageTimeRange.MONTH);
 
         final DatasetStatsSummary result = new DatasetStatsSummary();
         result.setQueryCountLast30Days(usageQueryResult.getAggregations().getTotalSqlQueries());
@@ -90,4 +101,10 @@ private CorpUser createPartialUser(final Urn userUrn) {
     result.setUrn(userUrn.toString());
     return result;
   }
+
+  private boolean isAuthorized(final Urn resourceUrn, final QueryContext context) {
+    return AuthorizationUtils.isAuthorized(context,
+            Optional.of(new ResourceSpec(resourceUrn.getEntityType(), resourceUrn.toString())),
+            PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE);
+  }
 }
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java
index 0476963b92e9a..20361830ad5a5 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java
@@ -9,12 +9,10 @@
 import com.linkedin.datahub.graphql.generated.UsageQueryResult;
 import com.linkedin.datahub.graphql.types.usage.UsageQueryResultMapper;
 import com.linkedin.metadata.authorization.PoliciesConfig;
-import com.linkedin.r2.RemoteInvocationException;
 import com.linkedin.usage.UsageClient;
 import com.linkedin.usage.UsageTimeRange;
 import graphql.schema.DataFetcher;
 import graphql.schema.DataFetchingEnvironment;
-import java.net.URISyntaxException;
 import java.util.Optional;
 import java.util.concurrent.CompletableFuture;
 import lombok.extern.slf4j.Slf4j;
@@ -44,10 +42,10 @@ public CompletableFuture<UsageQueryResult> get(DataFetchingEnvironment environme
       }
       try {
         com.linkedin.usage.UsageQueryResult
-            usageQueryResult = usageClient.getUsageStats(resourceUrn.toString(), range, context.getAuthentication());
+            usageQueryResult = usageClient.getUsageStats(resourceUrn.toString(), range);
         return UsageQueryResultMapper.map(usageQueryResult);
-      } catch (RemoteInvocationException | URISyntaxException e) {
-        throw new RuntimeException(String.format("Failed to load Usage Stats for resource %s", resourceUrn.toString()), e);
+      } catch (Exception e) {
+        throw new RuntimeException(String.format("Failed to load Usage Stats for resource %s", resourceUrn), e);
       }
     });
   }
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardStatsSummaryTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardStatsSummaryTest.java
index 163628c1bc590..6a9617ea41b44 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardStatsSummaryTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardStatsSummaryTest.java
@@ -117,8 +117,7 @@ public void testGetException() throws Exception {
     UsageClient mockClient = Mockito.mock(UsageClient.class);
     Mockito.when(mockClient.getUsageStats(
         Mockito.eq(TEST_DASHBOARD_URN),
-        Mockito.eq(UsageTimeRange.MONTH),
-        Mockito.any(Authentication.class)
+        Mockito.eq(UsageTimeRange.MONTH)
     )).thenThrow(RuntimeException.class);
 
     // Execute resolver
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolverTest.java
index bd3edf65bf7ad..013e23b779c51 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolverTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolverTest.java
@@ -1,6 +1,8 @@
 package com.linkedin.datahub.graphql.resolvers.dataset;
 
 import com.datahub.authentication.Authentication;
+import com.datahub.authorization.AuthorizationResult;
+import com.datahub.plugins.auth.authorization.Authorizer;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.datahub.graphql.QueryContext;
@@ -53,13 +55,18 @@ public void testGetSuccess() throws Exception {
     UsageClient mockClient = Mockito.mock(UsageClient.class);
     Mockito.when(mockClient.getUsageStats(
         Mockito.eq(TEST_DATASET_URN),
-        Mockito.eq(UsageTimeRange.MONTH),
-        Mockito.any(Authentication.class)
+        Mockito.eq(UsageTimeRange.MONTH)
     )).thenReturn(testResult);
 
     // Execute resolver
     DatasetStatsSummaryResolver resolver = new DatasetStatsSummaryResolver(mockClient);
     QueryContext mockContext = Mockito.mock(QueryContext.class);
+    Mockito.when(mockContext.getActorUrn()).thenReturn("urn:li:corpuser:test");
+    Authorizer mockAuthorizer = Mockito.mock(Authorizer.class);
+    AuthorizationResult mockAuthorizerResult = Mockito.mock(AuthorizationResult.class);
+    Mockito.when(mockAuthorizerResult.getType()).thenReturn(AuthorizationResult.Type.ALLOW);
+    Mockito.when(mockAuthorizer.authorize(Mockito.any())).thenReturn(mockAuthorizerResult);
+    Mockito.when(mockContext.getAuthorizer()).thenReturn(mockAuthorizer);
     Mockito.when(mockContext.getAuthentication()).thenReturn(Mockito.mock(Authentication.class));
     DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class);
     Mockito.when(mockEnv.getSource()).thenReturn(TEST_SOURCE);
@@ -79,8 +86,7 @@ public void testGetSuccess() throws Exception {
     newResult.setAggregations(new UsageQueryResultAggregations());
     Mockito.when(mockClient.getUsageStats(
         Mockito.eq(TEST_DATASET_URN),
-        Mockito.eq(UsageTimeRange.MONTH),
-        Mockito.any(Authentication.class)
+        Mockito.eq(UsageTimeRange.MONTH)
     )).thenReturn(newResult);
 
     // Then verify that the new result is _not_ returned (cache hit)
@@ -116,8 +122,7 @@ public void testGetException() throws Exception {
     UsageClient mockClient = Mockito.mock(UsageClient.class);
     Mockito.when(mockClient.getUsageStats(
         Mockito.eq(TEST_DATASET_URN),
-        Mockito.eq(UsageTimeRange.MONTH),
-        Mockito.any(Authentication.class)
+        Mockito.eq(UsageTimeRange.MONTH)
     )).thenThrow(RuntimeException.class);
 
     // Execute resolver
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSDisableWriteModeStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSDisableWriteModeStep.java
index e205fd2f5c20e..270aa11c7b070 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSDisableWriteModeStep.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSDisableWriteModeStep.java
@@ -1,11 +1,10 @@
 package com.linkedin.datahub.upgrade.common.steps;
 
-import com.datahub.authentication.Authentication;
 import com.linkedin.datahub.upgrade.UpgradeContext;
 import com.linkedin.datahub.upgrade.UpgradeStep;
 import com.linkedin.datahub.upgrade.UpgradeStepResult;
 import com.linkedin.datahub.upgrade.impl.DefaultUpgradeStepResult;
-import com.linkedin.entity.client.RestliEntityClient;
+import com.linkedin.entity.client.SystemRestliEntityClient;
 import java.util.function.Function;
 import lombok.RequiredArgsConstructor;
 
@@ -13,8 +12,7 @@
 @RequiredArgsConstructor
 public class GMSDisableWriteModeStep implements UpgradeStep {
 
-  private final Authentication _systemAuthentication;
-  private final RestliEntityClient _entityClient;
+  private final SystemRestliEntityClient _entityClient;
 
   @Override
   public String id() {
@@ -30,7 +28,7 @@ public int retryCount() {
   public Function<UpgradeContext, UpgradeStepResult> executable() {
     return (context) -> {
       try {
-        _entityClient.setWritable(false, _systemAuthentication);
+        _entityClient.setWritable(false);
       } catch (Exception e) {
         e.printStackTrace();
         context.report().addLine("Failed to turn write mode off in GMS");
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSEnableWriteModeStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSEnableWriteModeStep.java
index 270eff8df227c..8df02123983e8 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSEnableWriteModeStep.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/common/steps/GMSEnableWriteModeStep.java
@@ -1,20 +1,17 @@
 package com.linkedin.datahub.upgrade.common.steps;
 
-import com.datahub.authentication.Authentication;
 import com.linkedin.datahub.upgrade.UpgradeContext;
 import com.linkedin.datahub.upgrade.UpgradeStep;
 import com.linkedin.datahub.upgrade.UpgradeStepResult;
 import com.linkedin.datahub.upgrade.impl.DefaultUpgradeStepResult;
-import com.linkedin.entity.client.RestliEntityClient;
+import com.linkedin.entity.client.SystemRestliEntityClient;
 import java.util.function.Function;
 import lombok.RequiredArgsConstructor;
 
 
 @RequiredArgsConstructor
 public class GMSEnableWriteModeStep implements UpgradeStep {
-
-  private final Authentication _systemAuthentication;
-  private final RestliEntityClient _entityClient;
+  private final SystemRestliEntityClient _entityClient;
 
   @Override
   public String id() {
@@ -30,7 +27,7 @@ public int retryCount() {
   public Function<UpgradeContext, UpgradeStepResult> executable() {
     return (context) -> {
       try {
-        _entityClient.setWritable(true, _systemAuthentication);
+        _entityClient.setWritable(true);
       } catch (Exception e) {
         e.printStackTrace();
         context.report().addLine("Failed to turn write mode back on in GMS");
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeUpgradeConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeUpgradeConfig.java
index 30175c6fa78c8..cd264e529e9a5 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeUpgradeConfig.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NoCodeUpgradeConfig.java
@@ -1,8 +1,7 @@
 package com.linkedin.datahub.upgrade.config;
 
-import com.datahub.authentication.Authentication;
 import com.linkedin.datahub.upgrade.nocode.NoCodeUpgrade;
-import com.linkedin.entity.client.RestliEntityClient;
+import com.linkedin.entity.client.SystemRestliEntityClient;
 import com.linkedin.metadata.entity.EntityService;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import io.ebean.Database;
@@ -21,15 +20,14 @@ public class NoCodeUpgradeConfig {
   ApplicationContext applicationContext;
 
   @Bean(name = "noCodeUpgrade")
-  @DependsOn({"ebeanServer", "entityService", "systemAuthentication", "restliEntityClient", "entityRegistry"})
+  @DependsOn({"ebeanServer", "entityService", "systemRestliEntityClient", "entityRegistry"})
   @Nonnull
   public NoCodeUpgrade createInstance() {
     final Database ebeanServer = applicationContext.getBean(Database.class);
     final EntityService entityService = applicationContext.getBean(EntityService.class);
-    final Authentication systemAuthentication = applicationContext.getBean(Authentication.class);
-    final RestliEntityClient entityClient = applicationContext.getBean(RestliEntityClient.class);
+    final SystemRestliEntityClient entityClient = applicationContext.getBean(SystemRestliEntityClient.class);
     final EntityRegistry entityRegistry = applicationContext.getBean(EntityRegistry.class);
 
-    return new NoCodeUpgrade(ebeanServer, entityService, entityRegistry, systemAuthentication, entityClient);
+    return new NoCodeUpgrade(ebeanServer, entityService, entityRegistry, entityClient);
   }
 }
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreBackupConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreBackupConfig.java
index 9b0fcf279abf5..97a08800534de 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreBackupConfig.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/RestoreBackupConfig.java
@@ -1,8 +1,7 @@
 package com.linkedin.datahub.upgrade.config;
 
-import com.datahub.authentication.Authentication;
 import com.linkedin.datahub.upgrade.restorebackup.RestoreBackup;
-import com.linkedin.entity.client.RestliEntityClient;
+import com.linkedin.entity.client.SystemRestliEntityClient;
 import com.linkedin.metadata.entity.EntityService;
 import com.linkedin.metadata.graph.GraphService;
 import com.linkedin.metadata.models.registry.EntityRegistry;
@@ -22,19 +21,18 @@ public class RestoreBackupConfig {
   ApplicationContext applicationContext;
 
   @Bean(name = "restoreBackup")
-  @DependsOn({"ebeanServer", "entityService", "systemAuthentication", "restliEntityClient", "graphService",
+  @DependsOn({"ebeanServer", "entityService", "systemRestliEntityClient", "graphService",
       "searchService", "entityRegistry"})
   @Nonnull
   public RestoreBackup createInstance() {
     final Database ebeanServer = applicationContext.getBean(Database.class);
     final EntityService entityService = applicationContext.getBean(EntityService.class);
-    final Authentication systemAuthentication = applicationContext.getBean(Authentication.class);
-    final RestliEntityClient entityClient = applicationContext.getBean(RestliEntityClient.class);
+    final SystemRestliEntityClient entityClient = applicationContext.getBean(SystemRestliEntityClient.class);
     final GraphService graphClient = applicationContext.getBean(GraphService.class);
     final EntitySearchService searchClient = applicationContext.getBean(EntitySearchService.class);
     final EntityRegistry entityRegistry = applicationContext.getBean(EntityRegistry.class);
 
-    return new RestoreBackup(ebeanServer, entityService, entityRegistry, systemAuthentication, entityClient,
+    return new RestoreBackup(ebeanServer, entityService, entityRegistry, entityClient,
         graphClient, searchClient);
   }
 }
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/NoCodeUpgrade.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/NoCodeUpgrade.java
index ee4a3bc504e77..a299deb874721 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/NoCodeUpgrade.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/NoCodeUpgrade.java
@@ -1,13 +1,12 @@
 package com.linkedin.datahub.upgrade.nocode;
 
-import com.datahub.authentication.Authentication;
 import com.google.common.collect.ImmutableMap;
 import com.linkedin.datahub.upgrade.Upgrade;
 import com.linkedin.datahub.upgrade.UpgradeCleanupStep;
 import com.linkedin.datahub.upgrade.UpgradeStep;
 import com.linkedin.datahub.upgrade.common.steps.GMSEnableWriteModeStep;
 import com.linkedin.datahub.upgrade.common.steps.GMSQualificationStep;
-import com.linkedin.entity.client.RestliEntityClient;
+import com.linkedin.entity.client.SystemRestliEntityClient;
 import com.linkedin.metadata.entity.EntityService;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import io.ebean.Database;
@@ -30,12 +29,10 @@ public NoCodeUpgrade(
       final Database server,
       final EntityService entityService,
       final EntityRegistry entityRegistry,
-      final Authentication systemAuthentication,
-      final RestliEntityClient entityClient) {
+      final SystemRestliEntityClient entityClient) {
     _steps = buildUpgradeSteps(
         server, entityService,
         entityRegistry,
-        systemAuthentication,
         entityClient);
     _cleanupSteps = buildCleanupSteps();
   }
@@ -63,15 +60,14 @@ private List<UpgradeStep> buildUpgradeSteps(
       final Database server,
       final EntityService entityService,
       final EntityRegistry entityRegistry,
-      final Authentication systemAuthentication,
-      final RestliEntityClient entityClient) {
+      final SystemRestliEntityClient entityClient) {
     final List<UpgradeStep> steps = new ArrayList<>();
     steps.add(new RemoveAspectV2TableStep(server));
     steps.add(new GMSQualificationStep(ImmutableMap.of("noCode", "true")));
     steps.add(new UpgradeQualificationStep(server));
     steps.add(new CreateAspectTableStep(server));
     steps.add(new DataMigrationStep(server, entityService, entityRegistry));
-    steps.add(new GMSEnableWriteModeStep(systemAuthentication, entityClient));
+    steps.add(new GMSEnableWriteModeStep(entityClient));
     return steps;
   }
 }
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreBackup.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreBackup.java
index 67718a6739beb..9175ad606e3c8 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreBackup.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreBackup.java
@@ -1,6 +1,5 @@
 package com.linkedin.datahub.upgrade.restorebackup;
 
-import com.datahub.authentication.Authentication;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.datahub.upgrade.Upgrade;
 import com.linkedin.datahub.upgrade.UpgradeCleanupStep;
@@ -9,7 +8,7 @@
 import com.linkedin.datahub.upgrade.common.steps.ClearSearchServiceStep;
 import com.linkedin.datahub.upgrade.common.steps.GMSDisableWriteModeStep;
 import com.linkedin.datahub.upgrade.common.steps.GMSEnableWriteModeStep;
-import com.linkedin.entity.client.RestliEntityClient;
+import com.linkedin.entity.client.SystemRestliEntityClient;
 import com.linkedin.metadata.entity.EntityService;
 import com.linkedin.metadata.graph.GraphService;
 import com.linkedin.metadata.models.registry.EntityRegistry;
@@ -27,11 +26,10 @@ public RestoreBackup(
       final Database server,
       final EntityService entityService,
       final EntityRegistry entityRegistry,
-      final Authentication systemAuthentication,
-      final RestliEntityClient entityClient,
+      final SystemRestliEntityClient entityClient,
       final GraphService graphClient,
       final EntitySearchService searchClient) {
-    _steps = buildSteps(server, entityService, entityRegistry, systemAuthentication, entityClient, graphClient, searchClient);
+    _steps = buildSteps(server, entityService, entityRegistry, entityClient, graphClient, searchClient);
   }
 
   @Override
@@ -48,17 +46,16 @@ private List<UpgradeStep> buildSteps(
       final Database server,
       final EntityService entityService,
       final EntityRegistry entityRegistry,
-      final Authentication systemAuthentication,
-      final RestliEntityClient entityClient,
+      final SystemRestliEntityClient entityClient,
       final GraphService graphClient,
       final EntitySearchService searchClient) {
     final List<UpgradeStep> steps = new ArrayList<>();
-    steps.add(new GMSDisableWriteModeStep(systemAuthentication, entityClient));
+    steps.add(new GMSDisableWriteModeStep(entityClient));
     steps.add(new ClearSearchServiceStep(searchClient, true));
     steps.add(new ClearGraphServiceStep(graphClient, true));
     steps.add(new ClearAspectV2TableStep(server));
     steps.add(new RestoreStorageStep(entityService, entityRegistry));
-    steps.add(new GMSEnableWriteModeStep(systemAuthentication, entityClient));
+    steps.add(new GMSEnableWriteModeStep(entityClient));
     return steps;
   }
 
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java
new file mode 100644
index 0000000000000..6b5a3d5bfb06e
--- /dev/null
+++ b/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java
@@ -0,0 +1,39 @@
+package com.linkedin.metadata.client;
+
+import com.datahub.authentication.Authentication;
+import com.linkedin.entity.client.EntityClientCache;
+import com.linkedin.metadata.config.cache.client.EntityClientCacheConfig;
+import com.linkedin.entity.client.RestliEntityClient;
+import com.linkedin.entity.client.SystemEntityClient;
+import com.linkedin.metadata.entity.DeleteEntityService;
+import com.linkedin.metadata.entity.EntityService;
+import com.linkedin.metadata.event.EventProducer;
+import com.linkedin.metadata.search.EntitySearchService;
+import com.linkedin.metadata.search.LineageSearchService;
+import com.linkedin.metadata.search.SearchService;
+import com.linkedin.metadata.search.client.CachingEntitySearchService;
+import com.linkedin.metadata.timeseries.TimeseriesAspectService;
+import lombok.Getter;
+
+
+/**
+ * Java backed SystemEntityClient
+ */
+@Getter
+public class SystemJavaEntityClient extends JavaEntityClient implements SystemEntityClient {
+
+    private final EntityClientCache entityClientCache;
+    private final Authentication systemAuthentication;
+
+    public SystemJavaEntityClient(EntityService entityService, DeleteEntityService deleteEntityService,
+                                  EntitySearchService entitySearchService, CachingEntitySearchService cachingEntitySearchService,
+                                  SearchService searchService, LineageSearchService lineageSearchService,
+                                  TimeseriesAspectService timeseriesAspectService, EventProducer eventProducer,
+                                  RestliEntityClient restliEntityClient, Authentication systemAuthentication,
+                                  EntityClientCacheConfig cacheConfig) {
+        super(entityService, deleteEntityService, entitySearchService, cachingEntitySearchService, searchService,
+                lineageSearchService, timeseriesAspectService, eventProducer, restliEntityClient);
+        this.systemAuthentication = systemAuthentication;
+        this.entityClientCache = buildEntityClientCache(SystemJavaEntityClient.class, systemAuthentication, cacheConfig);
+    }
+}
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/DataProcessInstanceRunEventChangeEventGenerator.java b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/DataProcessInstanceRunEventChangeEventGenerator.java
index fee9cd9bca56e..a3e5a051a47e3 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/DataProcessInstanceRunEventChangeEventGenerator.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/DataProcessInstanceRunEventChangeEventGenerator.java
@@ -1,6 +1,5 @@
 package com.linkedin.metadata.timeline.eventgenerator;
 
-import com.datahub.authentication.Authentication;
 import com.linkedin.common.AuditStamp;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.dataprocess.DataProcessInstanceRelationships;
@@ -8,7 +7,7 @@
 import com.linkedin.dataprocess.DataProcessRunStatus;
 import com.linkedin.entity.EntityResponse;
 import com.linkedin.entity.EnvelopedAspectMap;
-import com.linkedin.entity.client.EntityClient;
+import com.linkedin.entity.client.SystemEntityClient;
 import com.linkedin.metadata.timeline.data.ChangeCategory;
 import com.linkedin.metadata.timeline.data.ChangeEvent;
 import com.linkedin.metadata.timeline.data.ChangeOperation;
@@ -27,9 +26,8 @@ public class DataProcessInstanceRunEventChangeEventGenerator
   private static final String COMPLETED_STATUS = "COMPLETED";
   private static final String STARTED_STATUS = "STARTED";
 
-  public DataProcessInstanceRunEventChangeEventGenerator(@Nonnull final EntityClient entityClient, @Nonnull final
-  Authentication authentication) {
-    super(entityClient, authentication);
+  public DataProcessInstanceRunEventChangeEventGenerator(@Nonnull final SystemEntityClient entityClient) {
+    super(entityClient);
   }
 
   @Override
@@ -108,8 +106,8 @@ private DataProcessInstanceRelationships getRelationships(@Nonnull final String
     EntityResponse entityResponse;
     try {
       entityUrn = Urn.createFromString(entityUrnString);
-      entityResponse = _entityClient.getV2(DATA_PROCESS_INSTANCE_ENTITY_NAME, entityUrn,
-          Collections.singleton(DATA_PROCESS_INSTANCE_RELATIONSHIPS_ASPECT_NAME), _authentication);
+      entityResponse = _entityClient.getV2(entityUrn,
+              Collections.singleton(DATA_PROCESS_INSTANCE_RELATIONSHIPS_ASPECT_NAME));
     } catch (Exception e) {
       return null;
     }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/EntityChangeEventGenerator.java b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/EntityChangeEventGenerator.java
index 7f6aa5e53268e..d5539ec3d3822 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/EntityChangeEventGenerator.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/EntityChangeEventGenerator.java
@@ -5,7 +5,7 @@
 import com.linkedin.common.AuditStamp;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.data.template.RecordTemplate;
-import com.linkedin.entity.client.EntityClient;
+import com.linkedin.entity.client.SystemEntityClient;
 import com.linkedin.metadata.entity.EntityAspect;
 import com.linkedin.metadata.timeline.data.ChangeCategory;
 import com.linkedin.metadata.timeline.data.ChangeEvent;
@@ -19,16 +19,14 @@
  */
 public abstract class EntityChangeEventGenerator<T extends RecordTemplate> {
   // TODO: Add a check for supported aspects
-  protected EntityClient _entityClient;
+  protected SystemEntityClient _entityClient;
   protected Authentication _authentication;
 
   public EntityChangeEventGenerator() {
   }
 
-  public EntityChangeEventGenerator(@Nonnull final EntityClient entityClient,
-      @Nonnull final Authentication authentication) {
+  public EntityChangeEventGenerator(@Nonnull final SystemEntityClient entityClient) {
     _entityClient = entityClient;
-    _authentication = authentication;
   }
 
   @Deprecated
diff --git a/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java b/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java
index 3b44ede0f1d43..a214117f4e1bc 100644
--- a/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java
+++ b/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java
@@ -1,6 +1,6 @@
 package com.linkedin.metadata.kafka;
 
-import com.linkedin.entity.client.RestliEntityClient;
+import com.linkedin.entity.client.SystemRestliEntityClient;
 import com.linkedin.gms.factory.auth.SystemAuthenticationFactory;
 import com.linkedin.metadata.dao.producer.KafkaHealthChecker;
 import com.linkedin.metadata.entity.EntityServiceImpl;
@@ -24,7 +24,7 @@ public class MaeConsumerApplicationTestConfiguration {
   private EntityServiceImpl _entityServiceImpl;
 
   @MockBean
-  private RestliEntityClient restliEntityClient;
+  private SystemRestliEntityClient restliEntityClient;
 
   @MockBean
   private Database ebeanServer;
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/config/EntityHydratorConfig.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/config/EntityHydratorConfig.java
index 2d8c52566e2ae..a9e54e5354b42 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/config/EntityHydratorConfig.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/config/EntityHydratorConfig.java
@@ -1,10 +1,10 @@
 package com.linkedin.metadata.kafka.config;
 
-import com.datahub.authentication.Authentication;
-import com.linkedin.entity.client.RestliEntityClient;
-import com.linkedin.gms.factory.auth.SystemAuthenticationFactory;
+import com.google.common.collect.ImmutableSet;
+import com.linkedin.entity.client.SystemRestliEntityClient;
 import com.linkedin.gms.factory.entity.RestliEntityClientFactory;
 import com.linkedin.metadata.kafka.hydrator.EntityHydrator;
+import com.linkedin.metadata.models.registry.EntityRegistry;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.context.annotation.Bean;
@@ -13,19 +13,25 @@
 
 
 @Configuration
-@Import({RestliEntityClientFactory.class, SystemAuthenticationFactory.class})
+@Import({RestliEntityClientFactory.class})
 public class EntityHydratorConfig {
 
   @Autowired
-  @Qualifier("systemAuthentication")
-  private Authentication _systemAuthentication;
+  @Qualifier("systemRestliEntityClient")
+  private SystemRestliEntityClient _entityClient;
 
   @Autowired
-  @Qualifier("restliEntityClient")
-  private RestliEntityClient _entityClient;
+  private EntityRegistry _entityRegistry;
+
+  public final static ImmutableSet<String> EXCLUDED_ASPECTS = ImmutableSet.<String>builder()
+          .add("datasetUpstreamLineage", "upstreamLineage")
+          .add("dataJobInputOutput")
+          .add("dataProcessInstanceRelationships", "dataProcessInstanceInput", "dataProcessInstanceOutput")
+          .add("inputFields")
+          .build();
 
   @Bean
   public EntityHydrator getEntityHydrator() {
-    return new EntityHydrator(_systemAuthentication, _entityClient);
+    return new EntityHydrator(_entityRegistry, _entityClient);
   }
 }
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java
index 55077c46a1526..3b65ecccad336 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHook.java
@@ -1,15 +1,12 @@
 package com.linkedin.metadata.kafka.hook.event;
 
-import com.datahub.authentication.Authentication;
 import com.google.common.collect.ImmutableSet;
 import com.linkedin.common.AuditStamp;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.data.DataMap;
 import com.linkedin.data.template.RecordTemplate;
 import com.linkedin.data.template.SetMode;
-import com.linkedin.entity.client.EntityClient;
-import com.linkedin.entity.client.RestliEntityClient;
-import com.linkedin.gms.factory.auth.SystemAuthenticationFactory;
+import com.linkedin.entity.client.SystemRestliEntityClient;
 import com.linkedin.gms.factory.entity.RestliEntityClientFactory;
 import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory;
 import com.linkedin.metadata.Constants;
@@ -46,8 +43,7 @@
  */
 @Slf4j
 @Component
-@Import({EntityChangeEventGeneratorRegistry.class, EntityRegistryFactory.class, RestliEntityClientFactory.class,
-    SystemAuthenticationFactory.class})
+@Import({EntityChangeEventGeneratorRegistry.class, EntityRegistryFactory.class, RestliEntityClientFactory.class})
 public class EntityChangeEventGeneratorHook implements MetadataChangeLogHook {
 
   /**
@@ -83,20 +79,18 @@ public class EntityChangeEventGeneratorHook implements MetadataChangeLogHook {
    */
   private static final Set<String> SUPPORTED_OPERATIONS = ImmutableSet.of("CREATE", "UPSERT", "DELETE");
   private final EntityChangeEventGeneratorRegistry _entityChangeEventGeneratorRegistry;
-  private final EntityClient _entityClient;
-  private final Authentication _systemAuthentication;
+  private final SystemRestliEntityClient _entityClient;
   private final EntityRegistry _entityRegistry;
   private final Boolean _isEnabled;
 
   @Autowired
   public EntityChangeEventGeneratorHook(
       @Nonnull final EntityChangeEventGeneratorRegistry entityChangeEventGeneratorRegistry,
-      @Nonnull final RestliEntityClient entityClient, @Nonnull final Authentication systemAuthentication,
+      @Nonnull final SystemRestliEntityClient entityClient,
       @Nonnull final EntityRegistry entityRegistry,
       @Nonnull @Value("${entityChangeEvents.enabled:true}") Boolean isEnabled) {
     _entityChangeEventGeneratorRegistry = Objects.requireNonNull(entityChangeEventGeneratorRegistry);
     _entityClient = Objects.requireNonNull(entityClient);
-    _systemAuthentication = Objects.requireNonNull(systemAuthentication);
     _entityRegistry = Objects.requireNonNull(entityRegistry);
     _isEnabled = isEnabled;
   }
@@ -189,8 +183,7 @@ private void emitPlatformEvent(@Nonnull final PlatformEvent event, @Nonnull fina
     _entityClient.producePlatformEvent(
         Constants.CHANGE_EVENT_PLATFORM_EVENT_NAME,
         partitioningKey,
-        event,
-        _systemAuthentication
+        event
     );
   }
 
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java
index 06545ef3525dd..7cbe53dee9fe4 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java
@@ -1,6 +1,5 @@
 package com.linkedin.metadata.kafka.hook.siblings;
 
-import com.datahub.authentication.Authentication;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
@@ -13,9 +12,8 @@
 import com.linkedin.dataset.UpstreamArray;
 import com.linkedin.dataset.UpstreamLineage;
 import com.linkedin.entity.EntityResponse;
-import com.linkedin.entity.client.RestliEntityClient;
+import com.linkedin.entity.client.SystemRestliEntityClient;
 import com.linkedin.events.metadata.ChangeType;
-import com.linkedin.gms.factory.auth.SystemAuthenticationFactory;
 import com.linkedin.gms.factory.entity.RestliEntityClientFactory;
 import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory;
 import com.linkedin.gms.factory.search.EntitySearchServiceFactory;
@@ -60,7 +58,7 @@
 @Slf4j
 @Component
 @Singleton
-@Import({EntityRegistryFactory.class, RestliEntityClientFactory.class, EntitySearchServiceFactory.class, SystemAuthenticationFactory.class})
+@Import({EntityRegistryFactory.class, RestliEntityClientFactory.class, EntitySearchServiceFactory.class})
 public class SiblingAssociationHook implements MetadataChangeLogHook {
 
   public static final String SIBLING_ASSOCIATION_SYSTEM_ACTOR = "urn:li:corpuser:__datahub_system_sibling_hook";
@@ -73,23 +71,20 @@ public class SiblingAssociationHook implements MetadataChangeLogHook {
   public static final String SOURCE_SUBTYPE_V2 = "Source";
 
   private final EntityRegistry _entityRegistry;
-  private final RestliEntityClient _entityClient;
+  private final SystemRestliEntityClient _entityClient;
   private final EntitySearchService _searchService;
-  private final Authentication _systemAuthentication;
   private final boolean _isEnabled;
 
   @Autowired
   public SiblingAssociationHook(
       @Nonnull final EntityRegistry entityRegistry,
-      @Nonnull final RestliEntityClient entityClient,
+      @Nonnull final SystemRestliEntityClient entityClient,
       @Nonnull final EntitySearchService searchService,
-      @Nonnull final Authentication systemAuthentication,
       @Nonnull @Value("${siblings.enabled:true}") Boolean isEnabled
   ) {
     _entityRegistry = entityRegistry;
     _entityClient = entityClient;
     _searchService = searchService;
-    _systemAuthentication = systemAuthentication;
     _isEnabled = isEnabled;
   }
 
@@ -251,9 +246,9 @@ private void setSiblingsAndSoftDeleteSibling(Urn dbtUrn, Urn sourceUrn) {
     dbtSiblingProposal.setEntityUrn(dbtUrn);
 
     try {
-      _entityClient.ingestProposal(dbtSiblingProposal, _systemAuthentication);
+      _entityClient.ingestProposal(dbtSiblingProposal, true);
     } catch (RemoteInvocationException e) {
-      log.error("Error while associating {} with {}: {}", dbtUrn.toString(), sourceUrn.toString(), e.toString());
+      log.error("Error while associating {} with {}: {}", dbtUrn, sourceUrn, e.toString());
       throw new RuntimeException("Error ingesting sibling proposal. Skipping processing.", e);
     }
 
@@ -274,9 +269,9 @@ private void setSiblingsAndSoftDeleteSibling(Urn dbtUrn, Urn sourceUrn) {
     List<Urn> filteredNewSiblingsArray =
         newSiblingsUrnArray.stream().filter(urn -> {
           try {
-            return _entityClient.exists(urn, _systemAuthentication);
+            return _entityClient.exists(urn);
           } catch (RemoteInvocationException e) {
-            log.error("Error while checking existence of {}: {}", urn.toString(), e.toString());
+            log.error("Error while checking existence of {}: {}", urn, e.toString());
             throw new RuntimeException("Error checking existence. Skipping processing.", e);
           }
         }).collect(Collectors.toList());
@@ -294,9 +289,9 @@ private void setSiblingsAndSoftDeleteSibling(Urn dbtUrn, Urn sourceUrn) {
     sourceSiblingProposal.setEntityUrn(sourceUrn);
 
     try {
-      _entityClient.ingestProposal(sourceSiblingProposal, _systemAuthentication);
+      _entityClient.ingestProposal(sourceSiblingProposal, true);
     } catch (RemoteInvocationException e) {
-      log.error("Error while associating {} with {}: {}", dbtUrn.toString(), sourceUrn.toString(), e.toString());
+      log.error("Error while associating {} with {}: {}", dbtUrn, sourceUrn, e.toString());
       throw new RuntimeException("Error ingesting sibling proposal. Skipping processing.", e);
     }
   }
@@ -406,11 +401,8 @@ private SubTypes getSubtypesFromEntityClient(
   ) {
     try {
       EntityResponse entityResponse = _entityClient.getV2(
-          DATASET_ENTITY_NAME,
           urn,
-          ImmutableSet.of(SUB_TYPES_ASPECT_NAME),
-          _systemAuthentication
-      );
+          ImmutableSet.of(SUB_TYPES_ASPECT_NAME));
 
       if (entityResponse != null && entityResponse.hasAspects() && entityResponse.getAspects().containsKey(Constants.SUB_TYPES_ASPECT_NAME)) {
         return new SubTypes(entityResponse.getAspects().get(Constants.SUB_TYPES_ASPECT_NAME).getValue().data());
@@ -427,10 +419,8 @@ private UpstreamLineage getUpstreamLineageFromEntityClient(
   ) {
     try {
       EntityResponse entityResponse = _entityClient.getV2(
-          DATASET_ENTITY_NAME,
           urn,
-          ImmutableSet.of(UPSTREAM_LINEAGE_ASPECT_NAME),
-          _systemAuthentication
+          ImmutableSet.of(UPSTREAM_LINEAGE_ASPECT_NAME)
       );
 
       if (entityResponse != null && entityResponse.hasAspects() && entityResponse.getAspects().containsKey(Constants.UPSTREAM_LINEAGE_ASPECT_NAME)) {
@@ -448,10 +438,8 @@ private Siblings getSiblingsFromEntityClient(
   ) {
     try {
       EntityResponse entityResponse = _entityClient.getV2(
-          DATASET_ENTITY_NAME,
           urn,
-          ImmutableSet.of(SIBLINGS_ASPECT_NAME),
-          _systemAuthentication
+          ImmutableSet.of(SIBLINGS_ASPECT_NAME)
       );
 
       if (entityResponse != null && entityResponse.hasAspects() && entityResponse.getAspects().containsKey(Constants.SIBLINGS_ASPECT_NAME)) {
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hydrator/EntityHydrator.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hydrator/EntityHydrator.java
index d768ada1765fa..0a3b38517eaad 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hydrator/EntityHydrator.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hydrator/EntityHydrator.java
@@ -1,28 +1,32 @@
 package com.linkedin.metadata.kafka.hydrator;
 
-import com.datahub.authentication.Authentication;
 import com.fasterxml.jackson.databind.node.JsonNodeFactory;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.entity.EntityResponse;
-import com.linkedin.entity.client.EntityClient;
+import com.linkedin.entity.client.SystemRestliEntityClient;
+import com.linkedin.metadata.models.AspectSpec;
+import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.r2.RemoteInvocationException;
 import java.net.URISyntaxException;
 import java.util.Collections;
 import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 
 import static com.linkedin.metadata.Constants.*;
+import static com.linkedin.metadata.kafka.config.EntityHydratorConfig.EXCLUDED_ASPECTS;
 
 
 @Slf4j
 @RequiredArgsConstructor
 public class EntityHydrator {
 
-  private final Authentication _systemAuthentication;
-  private final EntityClient _entityClient;
-
+  private final EntityRegistry _entityRegistry;
+  private final SystemRestliEntityClient _entityClient;
   private final ChartHydrator _chartHydrator = new ChartHydrator();
   private final CorpUserHydrator _corpUserHydrator = new CorpUserHydrator();
   private final DashboardHydrator _dashboardHydrator = new DashboardHydrator();
@@ -43,8 +47,12 @@ public Optional<ObjectNode> getHydratedEntity(String entityTypeName, String urn)
     // Hydrate fields from snapshot
     EntityResponse entityResponse;
     try {
-      entityResponse = _entityClient.batchGetV2(entityTypeName, Collections.singleton(urnObj), null,
-          this._systemAuthentication).get(urnObj);
+      Set<String> aspectNames = Optional.ofNullable(_entityRegistry.getEntitySpecs().get(urnObj.getEntityType()))
+              .map(spec -> spec.getAspectSpecs().stream().map(AspectSpec::getName)
+                      .filter(aspectName -> !EXCLUDED_ASPECTS.contains(aspectName))
+                      .collect(Collectors.toSet()))
+              .orElse(Set.of());
+      entityResponse = _entityClient.batchGetV2(Collections.singleton(urnObj), aspectNames).get(urnObj);
     } catch (RemoteInvocationException | URISyntaxException e) {
       log.error("Error while calling GMS to hydrate entity for urn {}", urn);
       return Optional.empty();
diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHookTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHookTest.java
index d8759da0fe1dd..7d9619f3e2d1c 100644
--- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHookTest.java
+++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHookTest.java
@@ -1,6 +1,5 @@
 package com.linkedin.metadata.kafka.hook.event;
 
-import com.datahub.authentication.Authentication;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.linkedin.assertion.AssertionResult;
@@ -38,8 +37,7 @@
 import com.linkedin.entity.EntityResponse;
 import com.linkedin.entity.EnvelopedAspect;
 import com.linkedin.entity.EnvelopedAspectMap;
-import com.linkedin.entity.client.EntityClient;
-import com.linkedin.entity.client.RestliEntityClient;
+import com.linkedin.entity.client.SystemRestliEntityClient;
 import com.linkedin.events.metadata.ChangeType;
 import com.linkedin.metadata.entity.EntityService;
 import com.linkedin.metadata.key.DatasetKey;
@@ -66,6 +64,7 @@
 import com.linkedin.platform.event.v1.Parameters;
 import java.net.URISyntaxException;
 import java.util.Map;
+
 import org.mockito.Mockito;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
@@ -92,22 +91,19 @@ public class EntityChangeEventGeneratorHookTest {
   private static final String TEST_DATA_FLOW_URN = "urn:li:dataFlow:flow";
   private static final String TEST_DATA_JOB_URN = "urn:li:dataJob:job";
   private Urn actorUrn;
-  private Authentication _mockAuthentication;
 
-  private RestliEntityClient _mockClient;
+  private SystemRestliEntityClient _mockClient;
   private EntityService _mockEntityService;
   private EntityChangeEventGeneratorHook _entityChangeEventHook;
 
   @BeforeMethod
   public void setupTest() throws URISyntaxException {
     actorUrn = Urn.createFromString(TEST_ACTOR_URN);
-    _mockAuthentication = Mockito.mock(Authentication.class);
-    _mockClient = Mockito.mock(RestliEntityClient.class);
+    _mockClient = Mockito.mock(SystemRestliEntityClient.class);
     _mockEntityService = Mockito.mock(EntityService.class);
     EntityChangeEventGeneratorRegistry entityChangeEventGeneratorRegistry = createEntityChangeEventGeneratorRegistry();
     _entityChangeEventHook =
-        new EntityChangeEventGeneratorHook(entityChangeEventGeneratorRegistry, _mockClient, _mockAuthentication,
-            createMockEntityRegistry(), true);
+        new EntityChangeEventGeneratorHook(entityChangeEventGeneratorRegistry, _mockClient, createMockEntityRegistry(), true);
   }
 
   @Test
@@ -498,8 +494,7 @@ public void testInvokeDataProcessInstanceRunEventStart() throws Exception {
     final EntityResponse entityResponse =
         buildEntityResponse(ImmutableMap.of(DATA_PROCESS_INSTANCE_RELATIONSHIPS_ASPECT_NAME, relationships));
 
-    Mockito.when(_mockClient.getV2(eq(DATA_PROCESS_INSTANCE_ENTITY_NAME), eq(dataProcessInstanceUrn),
-        any(), eq(_mockAuthentication))).thenReturn(entityResponse);
+    Mockito.when(_mockClient.getV2(eq(dataProcessInstanceUrn), any())).thenReturn(entityResponse);
 
     _entityChangeEventHook.invoke(event);
 
@@ -540,8 +535,7 @@ public void testInvokeDataProcessInstanceRunEventComplete() throws Exception {
     final EntityResponse entityResponse =
         buildEntityResponse(ImmutableMap.of(DATA_PROCESS_INSTANCE_RELATIONSHIPS_ASPECT_NAME, relationships));
 
-    Mockito.when(_mockClient.getV2(eq(DATA_PROCESS_INSTANCE_ENTITY_NAME), eq(dataProcessInstanceUrn),
-        any(), eq(_mockAuthentication))).thenReturn(entityResponse);
+    Mockito.when(_mockClient.getV2(eq(dataProcessInstanceUrn), any())).thenReturn(entityResponse);
 
     _entityChangeEventHook.invoke(event);
 
@@ -618,7 +612,7 @@ private EntityChangeEventGeneratorRegistry createEntityChangeEventGeneratorRegis
     // Run change event generators
     registry.register(ASSERTION_RUN_EVENT_ASPECT_NAME, new AssertionRunEventChangeEventGenerator());
     registry.register(DATA_PROCESS_INSTANCE_RUN_EVENT_ASPECT_NAME,
-        new DataProcessInstanceRunEventChangeEventGenerator(_mockClient, _mockAuthentication));
+        new DataProcessInstanceRunEventChangeEventGenerator(_mockClient));
     return registry;
   }
 
@@ -668,14 +662,14 @@ private EntityRegistry createMockEntityRegistry() {
     return registry;
   }
 
-  private void verifyProducePlatformEvent(EntityClient mockClient, PlatformEvent platformEvent) throws Exception {
+  private void verifyProducePlatformEvent(SystemRestliEntityClient mockClient, PlatformEvent platformEvent) throws Exception {
     verifyProducePlatformEvent(mockClient, platformEvent, true);
   }
 
-  private void verifyProducePlatformEvent(EntityClient mockClient, PlatformEvent platformEvent, boolean noMoreInteractions) throws Exception {
+  private void verifyProducePlatformEvent(SystemRestliEntityClient mockClient, PlatformEvent platformEvent, boolean noMoreInteractions) throws Exception {
     // Verify event has been emitted.
     verify(mockClient, Mockito.times(1)).producePlatformEvent(eq(CHANGE_EVENT_PLATFORM_EVENT_NAME), Mockito.anyString(),
-        argThat(new PlatformEventMatcher(platformEvent)), Mockito.any(Authentication.class));
+        argThat(new PlatformEventMatcher(platformEvent)));
 
     if (noMoreInteractions) {
       Mockito.verifyNoMoreInteractions(_mockClient);
diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java
index 78d304d67bfc0..6a2a05aa4b8c0 100644
--- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java
+++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java
@@ -1,6 +1,5 @@
 package com.linkedin.metadata.kafka.hook.siblings;
 
-import com.datahub.authentication.Authentication;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
 import com.linkedin.common.FabricType;
@@ -19,7 +18,7 @@
 import com.linkedin.entity.EntityResponse;
 import com.linkedin.entity.EnvelopedAspect;
 import com.linkedin.entity.EnvelopedAspectMap;
-import com.linkedin.entity.client.RestliEntityClient;
+import com.linkedin.entity.client.SystemRestliEntityClient;
 import com.linkedin.events.metadata.ChangeType;
 import com.linkedin.metadata.key.DatasetKey;
 import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
@@ -44,19 +43,16 @@
 
 public class SiblingAssociationHookTest {
   private SiblingAssociationHook _siblingAssociationHook;
-  RestliEntityClient _mockEntityClient;
+  SystemRestliEntityClient _mockEntityClient;
   EntitySearchService _mockSearchService;
-  Authentication _mockAuthentication;
 
   @BeforeMethod
   public void setupTest() {
     EntityRegistry registry = new ConfigEntityRegistry(
         SiblingAssociationHookTest.class.getClassLoader().getResourceAsStream("test-entity-registry-siblings.yml"));
-    _mockEntityClient = Mockito.mock(RestliEntityClient.class);
+    _mockEntityClient = Mockito.mock(SystemRestliEntityClient.class);
     _mockSearchService = Mockito.mock(EntitySearchService.class);
-    _mockAuthentication = Mockito.mock(Authentication.class);
-    _siblingAssociationHook = new SiblingAssociationHook(registry, _mockEntityClient, _mockSearchService, _mockAuthentication,
-        true);
+    _siblingAssociationHook = new SiblingAssociationHook(registry, _mockEntityClient, _mockSearchService, true);
     _siblingAssociationHook.setEnabled(true);
   }
 
@@ -69,15 +65,13 @@ public void testInvokeWhenThereIsAPairWithDbtSourceNode() throws Exception {
     EntityResponse mockResponse = new EntityResponse();
     mockResponse.setAspects(mockResponseMap);
 
-    Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true);
+    Mockito.when(_mockEntityClient.exists(Mockito.any())).thenReturn(true);
 
 
     Mockito.when(
         _mockEntityClient.getV2(
-            DATASET_ENTITY_NAME,
             Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.jaffle_shop.customers,PROD)"),
-            ImmutableSet.of(SUB_TYPES_ASPECT_NAME),
-            _mockAuthentication
+            ImmutableSet.of(SUB_TYPES_ASPECT_NAME)
         )).thenReturn(mockResponse);
 
 
@@ -105,10 +99,7 @@ public void testInvokeWhenThereIsAPairWithDbtSourceNode() throws Exception {
     proposal.setAspect(GenericRecordUtils.serializeAspect(dbtSiblingsAspect));
     proposal.setChangeType(ChangeType.UPSERT);
 
-    Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(
-        Mockito.eq(proposal),
-        Mockito.eq(_mockAuthentication)
-    );
+    Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(Mockito.eq(proposal), eq(true));
 
     final Siblings sourceSiblingsAspect = new Siblings()
         .setSiblings(new UrnArray(ImmutableList.of(Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.jaffle_shop.customers,PROD)"))))
@@ -121,10 +112,7 @@ public void testInvokeWhenThereIsAPairWithDbtSourceNode() throws Exception {
     proposal2.setAspect(GenericRecordUtils.serializeAspect(sourceSiblingsAspect));
     proposal2.setChangeType(ChangeType.UPSERT);
 
-    Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(
-        Mockito.eq(proposal2),
-        Mockito.eq(_mockAuthentication)
-    );
+    Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(Mockito.eq(proposal2), eq(true));
   }
 
   @Test
@@ -132,23 +120,20 @@ public void testInvokeWhenThereIsNoPairWithDbtModel() throws Exception {
     SubTypes mockSourceSubtypesAspect = new SubTypes();
     mockSourceSubtypesAspect.setTypeNames(new StringArray(ImmutableList.of("model")));
 
-    Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true);
+    Mockito.when(_mockEntityClient.exists(Mockito.any())).thenReturn(true);
 
     EnvelopedAspectMap mockResponseMap = new EnvelopedAspectMap();
     mockResponseMap.put(SUB_TYPES_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(mockSourceSubtypesAspect.data())));
     EntityResponse mockResponse = new EntityResponse();
     mockResponse.setAspects(mockResponseMap);
 
-    Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true);
+    Mockito.when(_mockEntityClient.exists(Mockito.any())).thenReturn(true);
 
 
     Mockito.when(
         _mockEntityClient.getV2(
-            DATASET_ENTITY_NAME,
             Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.jaffle_shop.customers,PROD)"),
-            ImmutableSet.of(SUB_TYPES_ASPECT_NAME),
-            _mockAuthentication
-        )).thenReturn(mockResponse);
+            ImmutableSet.of(SUB_TYPES_ASPECT_NAME))).thenReturn(mockResponse);
 
     MetadataChangeLog event = createEvent(DATASET_ENTITY_NAME, UPSTREAM_LINEAGE_ASPECT_NAME, ChangeType.UPSERT);
     Upstream upstream = createUpstream("urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj.jaffle_shop.customers,PROD)", DatasetLineageType.TRANSFORMED);
@@ -174,15 +159,12 @@ public void testInvokeWhenThereIsNoPairWithDbtModel() throws Exception {
     proposal.setAspect(GenericRecordUtils.serializeAspect(dbtSiblingsAspect));
     proposal.setChangeType(ChangeType.UPSERT);
 
-    Mockito.verify(_mockEntityClient, Mockito.times(0)).ingestProposal(
-        Mockito.eq(proposal),
-        Mockito.eq(_mockAuthentication)
-    );
+    Mockito.verify(_mockEntityClient, Mockito.times(0)).ingestProposal(Mockito.eq(proposal), eq(true));
   }
 
   @Test
   public void testInvokeWhenThereIsAPairWithBigqueryDownstreamNode() throws Exception {
-    Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true);
+    Mockito.when(_mockEntityClient.exists(Mockito.any())).thenReturn(true);
 
 
     MetadataChangeLog event = createEvent(DATASET_ENTITY_NAME, UPSTREAM_LINEAGE_ASPECT_NAME, ChangeType.UPSERT);
@@ -208,10 +190,7 @@ public void testInvokeWhenThereIsAPairWithBigqueryDownstreamNode() throws Except
     proposal.setAspect(GenericRecordUtils.serializeAspect(dbtSiblingsAspect));
     proposal.setChangeType(ChangeType.UPSERT);
 
-    Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(
-        Mockito.eq(proposal),
-        Mockito.eq(_mockAuthentication)
-    );
+    Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(Mockito.eq(proposal), eq(true));
 
     final Siblings sourceSiblingsAspect = new Siblings()
         .setSiblings(new UrnArray(ImmutableList.of(Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.jaffle_shop.customers,PROD)"))))
@@ -224,15 +203,12 @@ public void testInvokeWhenThereIsAPairWithBigqueryDownstreamNode() throws Except
     proposal2.setAspect(GenericRecordUtils.serializeAspect(sourceSiblingsAspect));
     proposal2.setChangeType(ChangeType.UPSERT);
 
-    Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(
-        Mockito.eq(proposal2),
-        Mockito.eq(_mockAuthentication)
-    );
+    Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(Mockito.eq(proposal2), eq(true));
   }
 
   @Test
   public void testInvokeWhenThereIsAKeyBeingReingested() throws Exception {
-    Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true);
+    Mockito.when(_mockEntityClient.exists(Mockito.any())).thenReturn(true);
 
     SearchResult returnSearchResult = new SearchResult();
     SearchEntityArray returnEntityArray = new SearchEntityArray();
@@ -271,10 +247,7 @@ public void testInvokeWhenThereIsAKeyBeingReingested() throws Exception {
     proposal.setAspect(GenericRecordUtils.serializeAspect(dbtSiblingsAspect));
     proposal.setChangeType(ChangeType.UPSERT);
 
-    Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(
-        Mockito.eq(proposal),
-        Mockito.eq(_mockAuthentication)
-    );
+    Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(Mockito.eq(proposal), eq(true));
 
     final Siblings sourceSiblingsAspect = new Siblings()
         .setSiblings(new UrnArray(ImmutableList.of(Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.jaffle_shop.customers,PROD)"))))
@@ -287,10 +260,7 @@ public void testInvokeWhenThereIsAKeyBeingReingested() throws Exception {
     proposal2.setAspect(GenericRecordUtils.serializeAspect(sourceSiblingsAspect));
     proposal2.setChangeType(ChangeType.UPSERT);
 
-    Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(
-        Mockito.eq(proposal2),
-        Mockito.eq(_mockAuthentication)
-    );
+    Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal(Mockito.eq(proposal2), eq(true));
   }
   @Test
   public void testInvokeWhenSourceUrnHasTwoDbtUpstreams() throws Exception {
@@ -309,10 +279,7 @@ public void testInvokeWhenSourceUrnHasTwoDbtUpstreams() throws Exception {
     _siblingAssociationHook.invoke(event);
 
 
-    Mockito.verify(_mockEntityClient, Mockito.times(0)).ingestProposal(
-            Mockito.any(),
-            Mockito.eq(_mockAuthentication)
-    );
+    Mockito.verify(_mockEntityClient, Mockito.times(0)).ingestProposal(Mockito.any(), eq(true));
 
 
   }
@@ -335,12 +302,7 @@ public void testInvokeWhenSourceUrnHasTwoUpstreamsOneDbt() throws Exception {
     _siblingAssociationHook.invoke(event);
 
 
-    Mockito.verify(_mockEntityClient, Mockito.times(2)).ingestProposal(
-            Mockito.any(),
-            Mockito.eq(_mockAuthentication)
-    );
-
-
+    Mockito.verify(_mockEntityClient, Mockito.times(2)).ingestProposal(Mockito.any(), eq(true));
   }
 
   private MetadataChangeLog createEvent(String entityType, String aspectName, ChangeType changeType) {
diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java
index ef80c49ec4520..dc5a6cd23295b 100644
--- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java
+++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java
@@ -2,7 +2,7 @@
 
 import com.datahub.authentication.Authentication;
 import com.datahub.metadata.ingestion.IngestionScheduler;
-import com.linkedin.entity.client.RestliEntityClient;
+import com.linkedin.entity.client.SystemRestliEntityClient;
 import com.linkedin.gms.factory.kafka.schemaregistry.SchemaRegistryConfig;
 import com.linkedin.metadata.boot.kafka.DataHubUpgradeKafkaListener;
 import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService;
@@ -44,8 +44,8 @@ public class MCLSpringTestConfiguration {
   @MockBean
   public IngestionScheduler ingestionScheduler;
 
-  @MockBean
-  public RestliEntityClient entityClient;
+  @MockBean(name = "systemRestliEntityClient")
+  public SystemRestliEntityClient entityClient;
 
   @MockBean
   public ElasticSearchService searchService;
diff --git a/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/MceConsumerApplication.java b/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/MceConsumerApplication.java
index 9b4fe15c11fc5..f0c59240a9ba4 100644
--- a/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/MceConsumerApplication.java
+++ b/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/MceConsumerApplication.java
@@ -1,8 +1,8 @@
 package com.linkedin.metadata.kafka;
 
 import com.linkedin.gms.factory.entity.RestliEntityClientFactory;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
 import com.linkedin.gms.factory.telemetry.ScheduledAnalyticsFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import org.springframework.boot.SpringApplication;
 import org.springframework.boot.actuate.autoconfigure.solr.SolrHealthContributorAutoConfiguration;
 import org.springframework.boot.autoconfigure.SpringBootApplication;
diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeEventsProcessor.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeEventsProcessor.java
index 74679d30b2945..c30dd6e6f96dc 100644
--- a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeEventsProcessor.java
+++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeEventsProcessor.java
@@ -4,8 +4,7 @@
 import com.codahale.metrics.MetricRegistry;
 import com.datahub.authentication.Authentication;
 import com.linkedin.entity.Entity;
-import com.linkedin.entity.client.RestliEntityClient;
-import com.linkedin.gms.factory.auth.SystemAuthenticationFactory;
+import com.linkedin.entity.client.SystemRestliEntityClient;
 import com.linkedin.gms.factory.entity.RestliEntityClientFactory;
 import com.linkedin.gms.factory.kafka.KafkaEventConsumerFactory;
 import com.linkedin.gms.factory.kafka.DataHubKafkaProducerFactory;
@@ -40,15 +39,14 @@
 @Slf4j
 @Component
 @Conditional(MetadataChangeProposalProcessorCondition.class)
-@Import({RestliEntityClientFactory.class, SystemAuthenticationFactory.class, KafkaEventConsumerFactory.class,
-    DataHubKafkaProducerFactory.class})
+@Import({RestliEntityClientFactory.class, KafkaEventConsumerFactory.class, DataHubKafkaProducerFactory.class})
 @EnableKafka
 @RequiredArgsConstructor
 public class MetadataChangeEventsProcessor {
 
   @NonNull
   private final Authentication systemAuthentication;
-  private final RestliEntityClient entityClient;
+  private final SystemRestliEntityClient entityClient;
   private final Producer<String, IndexedRecord> kafkaProducer;
 
   private final Histogram kafkaLagStats = MetricUtils.get().histogram(MetricRegistry.name(this.getClass(), "kafkaLag"));
diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java
index 289d70ef8c0e9..79f8c90af8ec7 100644
--- a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java
+++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java
@@ -2,9 +2,7 @@
 
 import com.codahale.metrics.Histogram;
 import com.codahale.metrics.MetricRegistry;
-import com.datahub.authentication.Authentication;
-import com.linkedin.entity.client.RestliEntityClient;
-import com.linkedin.gms.factory.auth.SystemAuthenticationFactory;
+import com.linkedin.entity.client.SystemRestliEntityClient;
 import com.linkedin.gms.factory.entity.RestliEntityClientFactory;
 import com.linkedin.gms.factory.kafka.KafkaEventConsumerFactory;
 import com.linkedin.gms.factory.kafka.DataHubKafkaProducerFactory;
@@ -35,15 +33,13 @@
 
 @Slf4j
 @Component
-@Import({RestliEntityClientFactory.class, SystemAuthenticationFactory.class, KafkaEventConsumerFactory.class,
-    DataHubKafkaProducerFactory.class})
+@Import({RestliEntityClientFactory.class, KafkaEventConsumerFactory.class, DataHubKafkaProducerFactory.class})
 @Conditional(MetadataChangeProposalProcessorCondition.class)
 @EnableKafka
 @RequiredArgsConstructor
 public class MetadataChangeProposalsProcessor {
 
-  private final Authentication systemAuthentication;
-  private final RestliEntityClient entityClient;
+  private final SystemRestliEntityClient entityClient;
   private final Producer<String, IndexedRecord> kafkaProducer;
 
   private final Histogram kafkaLagStats = MetricUtils.get().histogram(MetricRegistry.name(this.getClass(), "kafkaLag"));
@@ -64,7 +60,7 @@ public void consume(final ConsumerRecord<String, GenericRecord> consumerRecord)
       event = EventUtils.avroToPegasusMCP(record);
       log.debug("MetadataChangeProposal {}", event);
       // TODO: Get this from the event itself.
-      entityClient.ingestProposal(event, this.systemAuthentication, false);
+      entityClient.ingestProposal(event, false);
     } catch (Throwable throwable) {
       log.error("MCP Processor Error", throwable);
       log.error("Message: {}", record);
diff --git a/metadata-service/configuration/build.gradle b/metadata-service/configuration/build.gradle
index 30fa3079d29a4..bf79469633b0f 100644
--- a/metadata-service/configuration/build.gradle
+++ b/metadata-service/configuration/build.gradle
@@ -7,6 +7,7 @@ dependencies {
 
     implementation externalDependency.slf4jApi
     implementation externalDependency.springCore
+    implementation externalDependency.springBeans
 
     compileOnly externalDependency.lombok
 
diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/CacheConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/CacheConfiguration.java
index 38934cb9a3d2f..aff0e23e3b337 100644
--- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/CacheConfiguration.java
+++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/CacheConfiguration.java
@@ -1,5 +1,6 @@
 package com.linkedin.metadata.config.cache;
 
+import com.linkedin.metadata.config.cache.client.ClientCacheConfiguration;
 import lombok.Data;
 
 
@@ -8,4 +9,5 @@ public class CacheConfiguration {
   PrimaryCacheConfiguration primary;
   HomepageCacheConfiguration homepage;
   SearchCacheConfiguration search;
+  ClientCacheConfiguration client;
 }
diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/ClientCacheConfig.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/ClientCacheConfig.java
new file mode 100644
index 0000000000000..3cf7ef20797bb
--- /dev/null
+++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/ClientCacheConfig.java
@@ -0,0 +1,10 @@
+package com.linkedin.metadata.config.cache.client;
+
+
+public interface ClientCacheConfig {
+    boolean isEnabled();
+    boolean isStatsEnabled();
+    int getStatsIntervalSeconds();
+    int getDefaultTTLSeconds();
+    int getMaxBytes();
+}
diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/ClientCacheConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/ClientCacheConfiguration.java
new file mode 100644
index 0000000000000..d940bbe135e55
--- /dev/null
+++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/ClientCacheConfiguration.java
@@ -0,0 +1,9 @@
+package com.linkedin.metadata.config.cache.client;
+
+import lombok.Data;
+
+@Data
+public class ClientCacheConfiguration {
+    EntityClientCacheConfig entityClient;
+    UsageClientCacheConfig usageClient;
+}
diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/EntityClientCacheConfig.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/EntityClientCacheConfig.java
new file mode 100644
index 0000000000000..595b614f2f599
--- /dev/null
+++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/EntityClientCacheConfig.java
@@ -0,0 +1,17 @@
+package com.linkedin.metadata.config.cache.client;
+
+import lombok.Data;
+
+import java.util.Map;
+
+@Data
+public class EntityClientCacheConfig implements ClientCacheConfig {
+    private boolean enabled;
+    private boolean statsEnabled;
+    private int statsIntervalSeconds;
+    private int defaultTTLSeconds;
+    private int maxBytes;
+
+    // entityName -> aspectName -> cache ttl override
+    private Map<String, Map<String, Integer>> entityAspectTTLSeconds;
+}
diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/UsageClientCacheConfig.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/UsageClientCacheConfig.java
new file mode 100644
index 0000000000000..3aebec9422ed8
--- /dev/null
+++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/client/UsageClientCacheConfig.java
@@ -0,0 +1,12 @@
+package com.linkedin.metadata.config.cache.client;
+
+import lombok.Data;
+
+@Data
+public class UsageClientCacheConfig implements ClientCacheConfig {
+    private boolean enabled;
+    private boolean statsEnabled;
+    private int statsIntervalSeconds;
+    private int defaultTTLSeconds;
+    private int maxBytes;
+}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/spring/YamlPropertySourceFactory.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/spring/YamlPropertySourceFactory.java
similarity index 87%
rename from metadata-service/factories/src/main/java/com/linkedin/gms/factory/spring/YamlPropertySourceFactory.java
rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/spring/YamlPropertySourceFactory.java
index 1542407697d1b..c10399c4f3e70 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/spring/YamlPropertySourceFactory.java
+++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/spring/YamlPropertySourceFactory.java
@@ -1,14 +1,18 @@
-package com.linkedin.gms.factory.spring;
+package com.linkedin.metadata.spring;
 
-import java.io.IOException;
-import java.util.Properties;
 import org.springframework.beans.factory.config.YamlPropertiesFactoryBean;
 import org.springframework.core.env.PropertiesPropertySource;
 import org.springframework.core.env.PropertySource;
 import org.springframework.core.io.support.EncodedResource;
 import org.springframework.core.io.support.PropertySourceFactory;
 
+import java.io.IOException;
+import java.util.Properties;
+
 
+/**
+ * Required for Spring to parse the application.yml provided by this module
+ */
 public class YamlPropertySourceFactory implements PropertySourceFactory {
 
   @Override
diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml
index ea959bebf25ad..42749d8205d21 100644
--- a/metadata-service/configuration/src/main/resources/application.yml
+++ b/metadata-service/configuration/src/main/resources/application.yml
@@ -327,3 +327,27 @@ cache:
     lineage:
       ttlSeconds: ${CACHE_SEARCH_LINEAGE_TTL_SECONDS:86400} # 1 day
       lightningThreshold: ${CACHE_SEARCH_LINEAGE_LIGHTNING_THRESHOLD:300}
+  client:
+    usageClient:
+      enabled: ${CACHE_CLIENT_USAGE_CLIENT_ENABLED:true}
+      statsEnabled: ${CACHE_CLIENT_USAGE_CLIENT_STATS_ENABLED:true}
+      statsIntervalSeconds: ${CACHE_CLIENT_USAGE_CLIENT_STATS_INTERVAL_SECONDS:120}
+      defaultTTLSeconds: ${CACHE_CLIENT_USAGE_CLIENT_TTL_SECONDS:86400} # 1 day
+      maxBytes: ${CACHE_CLIENT_USAGE_CLIENT_MAX_BYTES:52428800} # 50MB
+    entityClient:
+      enabled: ${CACHE_CLIENT_ENTITY_CLIENT_ENABLED:true}
+      statsEnabled: ${CACHE_CLIENT_ENTITY_CLIENT_STATS_ENABLED:true}
+      statsIntervalSeconds: ${CACHE_CLIENT_ENTITY_CLIENT_STATS_INTERVAL_SECONDS:120}
+      defaultTTLSeconds: ${CACHE_CLIENT_ENTITY_CLIENT_TTL_SECONDS:0} # do not cache entity/aspects by default
+      maxBytes: ${CACHE_CLIENT_USAGE_ENTITY_MAX_BYTES:104857600} # 100MB
+      entityAspectTTLSeconds:
+        # cache user aspects for 20s
+        corpuser:
+          corpUserKey: 20
+          corpUserInfo: 20
+          corpUserEditableInfo: 20
+          corpUserStatus: 20
+          globalTags: 20
+          status: 20
+          corpUserCredentials: 20
+          corpUserSettings: 20
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java
index ed072398178de..bf50a0c7b6473 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java
@@ -19,7 +19,7 @@
 import com.datahub.plugins.loader.PluginPermissionManagerImpl;
 import com.google.common.collect.ImmutableMap;
 import com.linkedin.gms.factory.config.ConfigurationProvider;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.client.JavaEntityClient;
 import java.nio.file.Path;
 import java.nio.file.Paths;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java
index 30e03d87a8b56..5b298a453547a 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java
@@ -4,7 +4,7 @@
 import com.datahub.authorization.DataHubAuthorizer;
 import com.linkedin.metadata.client.JavaEntityClient;
 import com.linkedin.gms.factory.entity.RestliEntityClientFactory;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import javax.annotation.Nonnull;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java
index fc010a1aa2cae..6b2a61882be90 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java
@@ -1,9 +1,10 @@
 package com.linkedin.gms.factory.auth;
 
 import com.datahub.authentication.token.StatefulTokenService;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.entity.EntityService;
 import javax.annotation.Nonnull;
+
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.beans.factory.annotation.Value;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/GroupServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/GroupServiceFactory.java
index 9d29b8e77d02d..57598abf8095d 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/GroupServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/GroupServiceFactory.java
@@ -4,7 +4,7 @@
 
 import com.datahub.authentication.group.GroupService;
 import com.linkedin.metadata.client.JavaEntityClient;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.entity.EntityService;
 import com.linkedin.metadata.graph.GraphClient;
 import javax.annotation.Nonnull;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/InviteTokenServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/InviteTokenServiceFactory.java
index 47f7ef0e0c1eb..105f4c677a9e4 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/InviteTokenServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/InviteTokenServiceFactory.java
@@ -1,7 +1,7 @@
 package com.linkedin.gms.factory.auth;
 
 import com.datahub.authentication.invite.InviteTokenService;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.client.JavaEntityClient;
 import com.linkedin.metadata.secret.SecretService;
 import javax.annotation.Nonnull;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/NativeUserServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/NativeUserServiceFactory.java
index ca52420b440b2..3df499ea9392e 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/NativeUserServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/NativeUserServiceFactory.java
@@ -4,7 +4,7 @@
 
 import com.datahub.authentication.user.NativeUserService;
 import com.linkedin.metadata.client.JavaEntityClient;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.entity.EntityService;
 import com.linkedin.metadata.secret.SecretService;
 import javax.annotation.Nonnull;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/PostServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/PostServiceFactory.java
index 8e5e5e5cfc667..cc6f5c8272f9d 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/PostServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/PostServiceFactory.java
@@ -1,7 +1,7 @@
 package com.linkedin.gms.factory.auth;
 
 import com.datahub.authentication.post.PostService;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.client.JavaEntityClient;
 import javax.annotation.Nonnull;
 import org.springframework.beans.factory.annotation.Autowired;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/RoleServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/RoleServiceFactory.java
index 42f3e797c33bd..8a85f63cdd66d 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/RoleServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/RoleServiceFactory.java
@@ -3,7 +3,7 @@
 package com.linkedin.gms.factory.auth;
 
 import com.datahub.authorization.role.RoleService;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.client.JavaEntityClient;
 import javax.annotation.Nonnull;
 import org.springframework.beans.factory.annotation.Autowired;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/SystemAuthenticationFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/SystemAuthenticationFactory.java
index d6c171dc741e4..5bdd8cbf83c65 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/SystemAuthenticationFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/SystemAuthenticationFactory.java
@@ -3,7 +3,7 @@
 import com.datahub.authentication.Actor;
 import com.datahub.authentication.ActorType;
 import com.datahub.authentication.Authentication;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import javax.annotation.Nonnull;
 import lombok.Data;
 import org.springframework.beans.factory.annotation.Value;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java
index c1c5acbc1fddc..51c7db5e37366 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchGraphServiceFactory.java
@@ -3,7 +3,7 @@
 import com.linkedin.gms.factory.config.ConfigurationProvider;
 import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory;
 import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.models.registry.LineageRegistry;
 import com.linkedin.metadata.graph.elastic.ESGraphQueryDAO;
 import com.linkedin.metadata.graph.elastic.ESGraphWriteDAO;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java
index 89f196b056ee0..504618ba9cc6a 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticSearchSystemMetadataServiceFactory.java
@@ -1,7 +1,7 @@
 package com.linkedin.gms.factory.common;
 
 import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.systemmetadata.ESSystemMetadataDAO;
 import com.linkedin.metadata.systemmetadata.ElasticSearchSystemMetadataService;
 import javax.annotation.Nonnull;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticsearchSSLContextFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticsearchSSLContextFactory.java
index d57da336429d9..0dce80b98964b 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticsearchSSLContextFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/ElasticsearchSSLContextFactory.java
@@ -1,6 +1,6 @@
 package com.linkedin.gms.factory.common;
 
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import org.apache.http.ssl.SSLContextBuilder;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.context.annotation.Bean;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/GraphServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/GraphServiceFactory.java
index 02e31c7dc4f57..94593eb1fb84c 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/GraphServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/GraphServiceFactory.java
@@ -1,6 +1,6 @@
 package com.linkedin.gms.factory.common;
 
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.graph.GraphService;
 import com.linkedin.metadata.graph.neo4j.Neo4jGraphService;
 import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java
index a2816830f33ce..ada8466d302e6 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/IndexConventionFactory.java
@@ -1,6 +1,6 @@
 package com.linkedin.gms.factory.common;
 
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl;
 import org.springframework.beans.factory.annotation.Value;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/LocalEbeanServerConfigFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/LocalEbeanServerConfigFactory.java
index 5ab5b14160e27..6bf8ff123b221 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/LocalEbeanServerConfigFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/LocalEbeanServerConfigFactory.java
@@ -1,6 +1,6 @@
 package com.linkedin.gms.factory.common;
 
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.utils.metrics.MetricUtils;
 import io.ebean.config.ServerConfig;
 import io.ebean.datasource.DataSourceConfig;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/Neo4jDriverFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/Neo4jDriverFactory.java
index a364504d443f7..65b6115d6638e 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/Neo4jDriverFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/Neo4jDriverFactory.java
@@ -1,6 +1,6 @@
 package com.linkedin.gms.factory.common;
 
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import java.util.concurrent.TimeUnit;
 
 import org.neo4j.driver.AuthTokens;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/RestHighLevelClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/RestHighLevelClientFactory.java
index 5f50b8f7f0508..3c40b30bfc7d1 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/RestHighLevelClientFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/RestHighLevelClientFactory.java
@@ -1,7 +1,7 @@
 package com.linkedin.gms.factory.common;
 
 import com.linkedin.gms.factory.auth.AwsRequestSigningApacheInterceptor;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import java.io.IOException;
 import javax.annotation.Nonnull;
 import javax.net.ssl.HostnameVerifier;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java
index e07630111a567..465480be344c7 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java
@@ -12,7 +12,7 @@
 import com.linkedin.metadata.config.kafka.KafkaConfiguration;
 import com.linkedin.metadata.config.search.ElasticSearchConfiguration;
 import com.linkedin.datahub.graphql.featureflags.FeatureFlags;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.config.telemetry.TelemetryConfiguration;
 import lombok.Data;
 import org.springframework.boot.context.properties.ConfigurationProperties;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/dataproduct/DataProductServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/dataproduct/DataProductServiceFactory.java
index c0f2c8e1f1223..6eab711603c52 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/dataproduct/DataProductServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/dataproduct/DataProductServiceFactory.java
@@ -1,6 +1,6 @@
 package com.linkedin.gms.factory.dataproduct;
 
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.client.JavaEntityClient;
 import com.linkedin.metadata.graph.GraphClient;
 import com.linkedin.metadata.service.DataProductService;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java
index c9c3953f4d998..e1c24b805437b 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java
@@ -1,8 +1,11 @@
 package com.linkedin.gms.factory.entity;
 
+import com.datahub.authentication.Authentication;
+import com.linkedin.gms.factory.config.ConfigurationProvider;
 import com.linkedin.metadata.client.JavaEntityClient;
 import com.linkedin.entity.client.RestliEntityClient;
 import com.linkedin.gms.factory.kafka.DataHubKafkaProducerFactory;
+import com.linkedin.metadata.client.SystemJavaEntityClient;
 import com.linkedin.metadata.entity.DeleteEntityService;
 import com.linkedin.metadata.entity.EntityService;
 import com.linkedin.metadata.event.EventProducer;
@@ -53,12 +56,8 @@ public class JavaEntityClientFactory {
   @Qualifier("kafkaEventProducer")
   private EventProducer _eventProducer;
 
-  @Autowired
-  @Qualifier("restliEntityClient")
-  private RestliEntityClient _restliEntityClient;
-
   @Bean("javaEntityClient")
-  public JavaEntityClient getJavaEntityClient() {
+  public JavaEntityClient getJavaEntityClient(@Qualifier("restliEntityClient") final RestliEntityClient restliEntityClient) {
     return new JavaEntityClient(
         _entityService,
         _deleteEntityService,
@@ -68,6 +67,24 @@ public JavaEntityClient getJavaEntityClient() {
         _lineageSearchService,
         _timeseriesAspectService,
         _eventProducer,
-        _restliEntityClient);
+        restliEntityClient);
+  }
+
+  @Bean("systemJavaEntityClient")
+  public SystemJavaEntityClient systemJavaEntityClient(@Qualifier("configurationProvider") final ConfigurationProvider configurationProvider,
+                                                       @Qualifier("systemAuthentication") final Authentication systemAuthentication,
+                                                       @Qualifier("systemRestliEntityClient") final RestliEntityClient restliEntityClient) {
+    return new SystemJavaEntityClient(
+            _entityService,
+            _deleteEntityService,
+            _entitySearchService,
+            _cachingEntitySearchService,
+            _searchService,
+            _lineageSearchService,
+            _timeseriesAspectService,
+            _eventProducer,
+            restliEntityClient,
+            systemAuthentication,
+            configurationProvider.getCache().getClient().getEntityClient());
   }
 }
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RestliEntityClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RestliEntityClientFactory.java
index e149ecedfa6f6..dfc5e835392df 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RestliEntityClientFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RestliEntityClientFactory.java
@@ -1,10 +1,14 @@
 package com.linkedin.gms.factory.entity;
 
+import com.datahub.authentication.Authentication;
 import com.linkedin.entity.client.RestliEntityClient;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.entity.client.SystemRestliEntityClient;
+import com.linkedin.gms.factory.config.ConfigurationProvider;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.restli.DefaultRestliClientFactory;
 import com.linkedin.parseq.retry.backoff.ExponentialBackoff;
 import com.linkedin.restli.client.Client;
+import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.Configuration;
@@ -48,4 +52,17 @@ public RestliEntityClient getRestliEntityClient() {
     }
     return new RestliEntityClient(restClient, new ExponentialBackoff(retryInterval), numRetries);
   }
+
+  @Bean("systemRestliEntityClient")
+  public SystemRestliEntityClient systemRestliEntityClient(@Qualifier("configurationProvider") final ConfigurationProvider configurationProvider,
+                                                           @Qualifier("systemAuthentication") final Authentication systemAuthentication) {
+    final Client restClient;
+    if (gmsUri != null) {
+      restClient = DefaultRestliClientFactory.getRestLiClient(URI.create(gmsUri), gmsSslProtocol);
+    } else {
+      restClient = DefaultRestliClientFactory.getRestLiClient(gmsHost, gmsPort, gmsUseSSL, gmsSslProtocol);
+    }
+    return new SystemRestliEntityClient(restClient, new ExponentialBackoff(retryInterval), numRetries,
+            systemAuthentication, configurationProvider.getCache().getClient().getEntityClient());
+  }
 }
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java
index b13bf5813d47e..ff56f19e4f8fd 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java
@@ -1,7 +1,7 @@
 package com.linkedin.gms.factory.entity;
 
 import com.datastax.oss.driver.api.core.CqlSession;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.entity.EntityService;
 import com.linkedin.metadata.entity.RetentionService;
 import com.linkedin.metadata.entity.cassandra.CassandraRetentionService;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/ConfigEntityRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/ConfigEntityRegistryFactory.java
index 471f079683d60..cda21f8907867 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/ConfigEntityRegistryFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/ConfigEntityRegistryFactory.java
@@ -1,6 +1,6 @@
 package com.linkedin.gms.factory.entityregistry;
 
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
 import com.linkedin.metadata.models.registry.EntityRegistryException;
 import java.io.IOException;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java
index 150e1e48f39af..6dbb07309c7cc 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java
@@ -1,6 +1,6 @@
 package com.linkedin.gms.factory.entityregistry;
 
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.models.registry.PluginEntityRegistryLoader;
 import java.io.FileNotFoundException;
 import java.net.MalformedURLException;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java
index d7aee59ca6dd1..c50b4c9088bc2 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java
@@ -20,6 +20,7 @@
 import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory;
 import com.linkedin.gms.factory.entity.RestliEntityClientFactory;
 import com.linkedin.gms.factory.recommendation.RecommendationServiceFactory;
+import com.linkedin.metadata.client.SystemJavaEntityClient;
 import com.linkedin.metadata.entity.EntityService;
 import com.linkedin.metadata.graph.GraphClient;
 import com.linkedin.metadata.graph.GraphService;
@@ -65,6 +66,10 @@ public class GraphQLEngineFactory {
   @Qualifier("javaEntityClient")
   private JavaEntityClient _entityClient;
 
+  @Autowired
+  @Qualifier("systemJavaEntityClient")
+  private SystemJavaEntityClient _systemEntityClient;
+
   @Autowired
   @Qualifier("graphClient")
   private GraphClient _graphClient;
@@ -170,6 +175,7 @@ public class GraphQLEngineFactory {
   protected GraphQLEngine getInstance() {
     GmsGraphQLEngineArgs args = new GmsGraphQLEngineArgs();
     args.setEntityClient(_entityClient);
+    args.setSystemEntityClient(_systemEntityClient);
     args.setGraphClient(_graphClient);
     args.setUsageClient(_usageClient);
     if (isAnalyticsEnabled) {
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ingestion/IngestionSchedulerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ingestion/IngestionSchedulerFactory.java
index b310ee25cbcbb..9beb617c4f6e8 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ingestion/IngestionSchedulerFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ingestion/IngestionSchedulerFactory.java
@@ -6,7 +6,7 @@
 import com.linkedin.gms.factory.auth.SystemAuthenticationFactory;
 import com.linkedin.gms.factory.config.ConfigurationProvider;
 import com.linkedin.gms.factory.entity.RestliEntityClientFactory;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import javax.annotation.Nonnull;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaEventProducerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaEventProducerFactory.java
index 66f556066497f..675f015d9e378 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaEventProducerFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaEventProducerFactory.java
@@ -1,7 +1,7 @@
 package com.linkedin.gms.factory.kafka;
 
 import com.linkedin.gms.factory.common.TopicConventionFactory;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.dao.producer.KafkaEventProducer;
 import com.linkedin.metadata.dao.producer.KafkaHealthChecker;
 import com.linkedin.mxe.TopicConvention;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java
index e58661b357e6a..c67a2e704681f 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java
@@ -6,7 +6,7 @@
 import com.linkedin.gms.factory.kafka.schemaregistry.InternalSchemaRegistryFactory;
 import com.linkedin.gms.factory.kafka.schemaregistry.KafkaSchemaRegistryFactory;
 import com.linkedin.gms.factory.kafka.schemaregistry.SchemaRegistryConfig;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import java.util.Arrays;
 import java.util.Map;
 import org.apache.avro.generic.IndexedRecord;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/AwsGlueSchemaRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/AwsGlueSchemaRegistryFactory.java
index 59f08e3733704..ac1cbbc5cc5ff 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/AwsGlueSchemaRegistryFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/AwsGlueSchemaRegistryFactory.java
@@ -5,7 +5,7 @@
 import com.amazonaws.services.schemaregistry.utils.AWSSchemaRegistryConstants;
 import com.amazonaws.services.schemaregistry.utils.AvroRecordType;
 import com.linkedin.gms.factory.config.ConfigurationProvider;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Optional;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/KafkaSchemaRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/KafkaSchemaRegistryFactory.java
index d0e11baab9089..7b72ba3f3bb88 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/KafkaSchemaRegistryFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/KafkaSchemaRegistryFactory.java
@@ -1,7 +1,7 @@
 package com.linkedin.gms.factory.kafka.schemaregistry;
 
 import com.linkedin.gms.factory.config.ConfigurationProvider;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import io.confluent.kafka.schemaregistry.client.SchemaRegistryClientConfig;
 import io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig;
 import io.confluent.kafka.serializers.KafkaAvroDeserializer;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/lineage/LineageServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/lineage/LineageServiceFactory.java
index f76549c90af68..8596a14b7fc24 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/lineage/LineageServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/lineage/LineageServiceFactory.java
@@ -1,6 +1,6 @@
 package com.linkedin.gms.factory.lineage;
 
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.client.JavaEntityClient;
 import javax.annotation.Nonnull;
 
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ownership/OwnershipTypeServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ownership/OwnershipTypeServiceFactory.java
index 512a0a1fa40ab..3a1f18692fdc6 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ownership/OwnershipTypeServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/ownership/OwnershipTypeServiceFactory.java
@@ -1,7 +1,7 @@
 package com.linkedin.gms.factory.ownership;
 
 import com.datahub.authentication.Authentication;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.client.JavaEntityClient;
 import com.linkedin.metadata.service.OwnershipTypeService;
 import javax.annotation.Nonnull;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/query/QueryServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/query/QueryServiceFactory.java
index f2bdce908319e..f98c5bd50467d 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/query/QueryServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/query/QueryServiceFactory.java
@@ -1,7 +1,7 @@
 package com.linkedin.gms.factory.query;
 
 import com.datahub.authentication.Authentication;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.client.JavaEntityClient;
 import com.linkedin.metadata.service.QueryService;
 import javax.annotation.Nonnull;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/BaseElasticSearchComponentsFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/BaseElasticSearchComponentsFactory.java
index 620af803723e7..c99d429e986b6 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/BaseElasticSearchComponentsFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/BaseElasticSearchComponentsFactory.java
@@ -2,7 +2,7 @@
 
 import com.linkedin.gms.factory.common.IndexConventionFactory;
 import com.linkedin.gms.factory.common.RestHighLevelClientFactory;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
 import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/CachingEntitySearchServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/CachingEntitySearchServiceFactory.java
index 7b20e798b79f2..845c63c32e0fd 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/CachingEntitySearchServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/CachingEntitySearchServiceFactory.java
@@ -1,6 +1,6 @@
 package com.linkedin.gms.factory.search;
 
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.search.EntitySearchService;
 import com.linkedin.metadata.search.client.CachingEntitySearchService;
 import javax.annotation.Nonnull;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java
index fc6f92b2678f3..5deffdb01d247 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchBulkProcessorFactory.java
@@ -1,7 +1,7 @@
 package com.linkedin.gms.factory.search;
 
 import com.linkedin.gms.factory.common.RestHighLevelClientFactory;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import javax.annotation.Nonnull;
 
 import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java
index 495d77ccbb29f..b619ee9516dce 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java
@@ -6,7 +6,7 @@
 import com.linkedin.gms.factory.common.IndexConventionFactory;
 import com.linkedin.gms.factory.common.RestHighLevelClientFactory;
 import com.linkedin.gms.factory.config.ConfigurationProvider;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
 import com.linkedin.metadata.version.GitVersion;
 import javax.annotation.Nonnull;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java
index 03dd2d072b4a0..a2a0dbaf89c79 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java
@@ -7,7 +7,7 @@
 import com.fasterxml.jackson.dataformat.yaml.YAMLMapper;
 import com.linkedin.gms.factory.config.ConfigurationProvider;
 import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.search.elasticsearch.ElasticSearchService;
 import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/LineageSearchServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/LineageSearchServiceFactory.java
index 94b3f40849a13..e2eef83bc6e3f 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/LineageSearchServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/LineageSearchServiceFactory.java
@@ -2,7 +2,7 @@
 
 import com.linkedin.gms.factory.common.GraphServiceFactory;
 import com.linkedin.gms.factory.config.ConfigurationProvider;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.graph.GraphService;
 import com.linkedin.metadata.search.LineageSearchService;
 import com.linkedin.metadata.search.SearchService;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchDocumentTransformerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchDocumentTransformerFactory.java
index e1fe0399cb115..a186d2de770f3 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchDocumentTransformerFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchDocumentTransformerFactory.java
@@ -1,6 +1,6 @@
 package com.linkedin.gms.factory.search;
 
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.search.transformer.SearchDocumentTransformer;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.context.annotation.Bean;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchServiceFactory.java
index 70307e51f3256..64bb0218a0d71 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SearchServiceFactory.java
@@ -1,7 +1,7 @@
 package com.linkedin.gms.factory.search;
 
 import com.linkedin.gms.factory.config.ConfigurationProvider;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.search.EntitySearchService;
 import com.linkedin.metadata.search.SearchService;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SettingsBuilderFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SettingsBuilderFactory.java
index b6bfef6ed8c78..840a370957706 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SettingsBuilderFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/SettingsBuilderFactory.java
@@ -1,7 +1,7 @@
 package com.linkedin.gms.factory.search;
 
 import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder;
 import org.springframework.beans.factory.annotation.Autowired;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/views/ViewServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/views/ViewServiceFactory.java
index 006b992191cfa..60bcd9ea22be6 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/views/ViewServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/views/ViewServiceFactory.java
@@ -1,7 +1,7 @@
 package com.linkedin.gms.factory.search.views;
 
 import com.datahub.authentication.Authentication;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.client.JavaEntityClient;
 import com.linkedin.metadata.service.ViewService;
 import javax.annotation.Nonnull;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/settings/SettingsServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/settings/SettingsServiceFactory.java
index 73ec79fa7ed08..2e22d43913493 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/settings/SettingsServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/settings/SettingsServiceFactory.java
@@ -1,7 +1,7 @@
 package com.linkedin.gms.factory.settings;
 
 import com.datahub.authentication.Authentication;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.client.JavaEntityClient;
 import com.linkedin.metadata.service.SettingsService;
 import javax.annotation.Nonnull;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelApiFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelApiFactory.java
index b2982d1f8ed9d..8178ce1399aa3 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelApiFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelApiFactory.java
@@ -1,6 +1,6 @@
 package com.linkedin.gms.factory.telemetry;
 
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.mixpanel.mixpanelapi.MixpanelAPI;
 import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
 import org.springframework.context.annotation.Bean;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelMessageBuilderFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelMessageBuilderFactory.java
index aa8596786ce11..5385c5e81f804 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelMessageBuilderFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/MixpanelMessageBuilderFactory.java
@@ -1,6 +1,6 @@
 package com.linkedin.gms.factory.telemetry;
 
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.mixpanel.mixpanelapi.MessageBuilder;
 import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
 import org.springframework.context.annotation.Bean;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/TrackingServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/TrackingServiceFactory.java
index 3b53a6fe92810..bb166af5501b3 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/TrackingServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/telemetry/TrackingServiceFactory.java
@@ -1,7 +1,7 @@
 package com.linkedin.gms.factory.telemetry;
 
 import com.datahub.telemetry.TrackingService;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.entity.EntityService;
 import com.linkedin.metadata.secret.SecretService;
 import com.linkedin.metadata.version.GitVersion;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/EntityChangeEventGeneratorRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/EntityChangeEventGeneratorRegistryFactory.java
index e9b9850c01a2b..89a7e7dd8d71a 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/EntityChangeEventGeneratorRegistryFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/EntityChangeEventGeneratorRegistryFactory.java
@@ -1,7 +1,7 @@
 package com.linkedin.gms.factory.timeline;
 
 import com.datahub.authentication.Authentication;
-import com.linkedin.entity.client.RestliEntityClient;
+import com.linkedin.entity.client.SystemRestliEntityClient;
 import com.linkedin.metadata.timeline.eventgenerator.AssertionRunEventChangeEventGenerator;
 import com.linkedin.metadata.timeline.eventgenerator.DataProcessInstanceRunEventChangeEventGenerator;
 import com.linkedin.metadata.timeline.eventgenerator.DatasetPropertiesChangeEventGenerator;
@@ -38,7 +38,7 @@ public class EntityChangeEventGeneratorRegistryFactory {
   @Singleton
   @Nonnull
   protected com.linkedin.metadata.timeline.eventgenerator.EntityChangeEventGeneratorRegistry entityChangeEventGeneratorRegistry() {
-    final RestliEntityClient entityClient = applicationContext.getBean(RestliEntityClient.class);
+    final SystemRestliEntityClient entityClient = applicationContext.getBean(SystemRestliEntityClient.class);
     final Authentication systemAuthentication = applicationContext.getBean(Authentication.class);
 
     final com.linkedin.metadata.timeline.eventgenerator.EntityChangeEventGeneratorRegistry registry =
@@ -74,7 +74,7 @@ protected com.linkedin.metadata.timeline.eventgenerator.EntityChangeEventGenerat
 
     // Data Process Instance differs
     registry.register(DATA_PROCESS_INSTANCE_RUN_EVENT_ASPECT_NAME,
-        new DataProcessInstanceRunEventChangeEventGenerator(entityClient, systemAuthentication));
+        new DataProcessInstanceRunEventChangeEventGenerator(entityClient));
 
     // TODO: Add ML models.
 
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/TimelineServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/TimelineServiceFactory.java
index df9d80eb63a02..baa22d401387f 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/TimelineServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeline/TimelineServiceFactory.java
@@ -1,6 +1,6 @@
 package com.linkedin.gms.factory.timeline;
 
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.entity.AspectDao;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.timeline.TimelineService;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java
index 717adf7d559b7..e3cc772f21c40 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java
@@ -2,7 +2,7 @@
 
 import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory;
 import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.timeseries.elastic.ElasticSearchTimeseriesAspectService;
 import com.linkedin.metadata.timeseries.elastic.indexbuilder.TimeseriesAspectIndexBuilders;
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/usage/UsageClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/usage/UsageClientFactory.java
index e4cbb92cebbba..e83cbc82d8067 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/usage/UsageClientFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/usage/UsageClientFactory.java
@@ -1,10 +1,14 @@
 package com.linkedin.gms.factory.usage;
 
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.datahub.authentication.Authentication;
+import com.linkedin.gms.factory.config.ConfigurationProvider;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.restli.DefaultRestliClientFactory;
 import com.linkedin.parseq.retry.backoff.ExponentialBackoff;
 import com.linkedin.restli.client.Client;
 import com.linkedin.usage.UsageClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.Configuration;
@@ -33,10 +37,15 @@ public class UsageClientFactory {
   @Value("${usageClient.numRetries:3}")
   private int numRetries;
 
+  @Autowired
+  @Qualifier("configurationProvider")
+  private ConfigurationProvider configurationProvider;
+
   @Bean("usageClient")
-  public UsageClient getUsageClient() {
+  public UsageClient getUsageClient(@Qualifier("systemAuthentication") final Authentication systemAuthentication) {
     Client restClient = DefaultRestliClientFactory.getRestLiClient(gmsHost, gmsPort, gmsUseSSL, gmsSslProtocol);
-    return new UsageClient(restClient, new ExponentialBackoff(retryInterval), numRetries);
+    return new UsageClient(restClient, new ExponentialBackoff(retryInterval), numRetries, systemAuthentication,
+            configurationProvider.getCache().getClient().getUsageClient());
   }
 }
 
diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java
index 91fc58d074ed6..e038cb230c458 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java
@@ -1,7 +1,7 @@
 package com.linkedin.metadata.boot.factories;
 
 import com.linkedin.gms.factory.entity.RetentionServiceFactory;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.boot.steps.IngestRetentionPoliciesStep;
 import com.linkedin.metadata.entity.EntityService;
 import com.linkedin.metadata.entity.RetentionService;
diff --git a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/util/OpenApiEntitiesUtilTest.java b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/util/OpenApiEntitiesUtilTest.java
index e0fec07452302..b4e87eedea542 100644
--- a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/util/OpenApiEntitiesUtilTest.java
+++ b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/util/OpenApiEntitiesUtilTest.java
@@ -1,7 +1,7 @@
 package io.datahubproject.openapi.util;
 
 import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor;
-import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import io.datahubproject.openapi.config.OpenAPIEntityTestConfiguration;
 import io.datahubproject.openapi.dto.UpsertAspectRequest;
diff --git a/metadata-service/restli-client/build.gradle b/metadata-service/restli-client/build.gradle
index 45cf008d3ca7d..b1b778b45c0b5 100644
--- a/metadata-service/restli-client/build.gradle
+++ b/metadata-service/restli-client/build.gradle
@@ -7,6 +7,7 @@ dependencies {
   api project(path: ':metadata-service:restli-api', configuration: 'restClient')
   api project(':metadata-events:mxe-schemas')
   api project(':metadata-utils')
+  implementation project(':metadata-service:configuration')
 
   implementation externalDependency.slf4jApi
   compileOnly externalDependency.lombok
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/common/client/ClientCache.java b/metadata-service/restli-client/src/main/java/com/linkedin/common/client/ClientCache.java
new file mode 100644
index 0000000000000..8aa0984be57b9
--- /dev/null
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/common/client/ClientCache.java
@@ -0,0 +1,134 @@
+package com.linkedin.common.client;
+
+import com.codahale.metrics.Gauge;
+import com.github.benmanes.caffeine.cache.CacheLoader;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import com.github.benmanes.caffeine.cache.Expiry;
+import com.github.benmanes.caffeine.cache.LoadingCache;
+import com.github.benmanes.caffeine.cache.Weigher;
+import com.github.benmanes.caffeine.cache.stats.CacheStats;
+import com.linkedin.metadata.config.cache.client.ClientCacheConfig;
+import com.linkedin.metadata.utils.metrics.MetricUtils;
+import lombok.Builder;
+import lombok.NonNull;
+import lombok.extern.slf4j.Slf4j;
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+import java.util.function.BiFunction;
+import java.util.function.Function;
+
+/**
+ * Generic cache with common configuration for limited weight, per item expiry, and batch loading
+ * @param <K> key
+ * @param <V> value
+ */
+@Slf4j
+@Builder
+public class ClientCache<K, V, C extends ClientCacheConfig> {
+    @NonNull
+    protected final C config;
+    @NonNull
+    protected final LoadingCache<K, V> cache;
+    @NonNull
+    private final Function<Iterable<? extends K>, Map<K, V>> loadFunction;
+    @NonNull
+    private final Weigher<K, V> weigher;
+    @NonNull
+    private final BiFunction<C, K, Integer> ttlSecondsFunction;
+
+    public @Nullable V get(@NonNull K key) {
+        return cache.get(key);
+    }
+
+    public @NonNull Map<@NonNull K, @NonNull V> getAll(@NonNull Iterable<? extends @NonNull K> keys) {
+        return cache.getAll(keys);
+    }
+
+    public void refresh(@NonNull K key) {
+        cache.refresh(key);
+    }
+
+    public static class ClientCacheBuilder<K, V, C extends ClientCacheConfig> {
+
+        private ClientCacheBuilder<K, V, C> cache(LoadingCache<K, V> cache) {
+            return null;
+        }
+        private ClientCache<K, V, C> build() {
+            return null;
+        }
+
+        public ClientCache<K, V, C> build(Class<?> metricClazz) {
+            // loads data from entity client
+            CacheLoader<K, V> loader = new CacheLoader<>() {
+                @Override
+                public V load(@NonNull K key) {
+                    return loadAll(List.of(key)).get(key);
+                }
+
+                @Override
+                @NonNull
+                public Map<K, V> loadAll(@NonNull Iterable<? extends K> keys) {
+                    return loadFunction.apply(keys);
+                }
+            };
+
+            // build cache
+            Caffeine<K, V> caffeine = Caffeine.newBuilder()
+                    .maximumWeight(config.getMaxBytes())
+                    // limit total size
+                    .weigher(weigher)
+                    .softValues()
+                    // define per entity/aspect ttls
+                    .expireAfter(new Expiry<K, V>() {
+                        public long expireAfterCreate(@NonNull K key, @NonNull V aspect, long currentTime) {
+                            int ttlSeconds = ttlSecondsFunction.apply(config, key);
+                            if (ttlSeconds < 0) {
+                                ttlSeconds = Integer.MAX_VALUE;
+                            }
+                            return TimeUnit.SECONDS.toNanos(ttlSeconds);
+                        }
+                        public long expireAfterUpdate(@NonNull K key, @NonNull V aspect,
+                                                      long currentTime, long currentDuration) {
+                            return currentDuration;
+                        }
+                        public long expireAfterRead(@NonNull K key, @NonNull V aspect,
+                                                    long currentTime, long currentDuration) {
+                            return currentDuration;
+                        }
+                    });
+
+            if (config.isStatsEnabled()) {
+                caffeine.recordStats();
+            }
+
+            LoadingCache<K, V> cache = caffeine.build(loader);
+
+            if (config.isStatsEnabled()) {
+                ScheduledThreadPoolExecutor executor = new ScheduledThreadPoolExecutor(1);
+                executor.scheduleAtFixedRate(() -> {
+                    CacheStats cacheStats = cache.stats();
+
+                    MetricUtils.gauge(metricClazz, "hitRate", () -> (Gauge<Double>) cacheStats::hitRate);
+                    MetricUtils.gauge(metricClazz, "loadFailureRate", () ->
+                            (Gauge<Double>) cacheStats::loadFailureRate);
+                    MetricUtils.gauge(metricClazz, "evictionCount", () ->
+                            (Gauge<Long>) cacheStats::evictionCount);
+                    MetricUtils.gauge(metricClazz, "loadFailureCount", () ->
+                            (Gauge<Long>) cacheStats::loadFailureCount);
+                    MetricUtils.gauge(metricClazz, "averageLoadPenalty", () ->
+                            (Gauge<Double>) cacheStats::averageLoadPenalty);
+                    MetricUtils.gauge(metricClazz, "evictionWeight", () ->
+                            (Gauge<Long>) cacheStats::evictionWeight);
+
+                    log.debug(metricClazz.getSimpleName() + ": " + cacheStats);
+                }, 0, config.getStatsIntervalSeconds(), TimeUnit.SECONDS);
+            }
+
+            return new ClientCache<>(config, cache, loadFunction, weigher, ttlSecondsFunction);
+        }
+    }
+}
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClientCache.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClientCache.java
new file mode 100644
index 0000000000000..3b35dc528915a
--- /dev/null
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClientCache.java
@@ -0,0 +1,141 @@
+package com.linkedin.entity.client;
+
+import com.github.benmanes.caffeine.cache.LoadingCache;
+import com.github.benmanes.caffeine.cache.Weigher;
+import com.linkedin.common.client.ClientCache;
+import com.linkedin.common.urn.Urn;
+import com.linkedin.entity.EntityResponse;
+import com.linkedin.entity.EnvelopedAspect;
+import com.linkedin.entity.EnvelopedAspectMap;
+import com.linkedin.metadata.config.cache.client.EntityClientCacheConfig;
+import com.linkedin.util.Pair;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NonNull;
+
+import javax.annotation.Nonnull;
+import java.util.Collection;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.function.BiFunction;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
+
+import static com.linkedin.metadata.utils.PegasusUtils.urnToEntityName;
+
+@Builder
+public class EntityClientCache {
+    @NonNull
+    private EntityClientCacheConfig config;
+    @NonNull
+    private final ClientCache<Key, EnvelopedAspect, EntityClientCacheConfig> cache;
+    @NonNull
+    private BiFunction<Set<Urn>, Set<String>, Map<Urn, EntityResponse>> loadFunction;
+
+    public EntityResponse getV2(@Nonnull final Urn urn, @Nonnull final Set<String> aspectNames) {
+        return batchGetV2(Set.of(urn), aspectNames).get(urn);
+    }
+
+    public Map<Urn, EntityResponse> batchGetV2(@Nonnull final Set<Urn> urns, @Nonnull final Set<String> aspectNames) {
+        final Map<Urn, EntityResponse> response;
+
+        if (config.isEnabled()) {
+            Set<Key> keys = urns.stream()
+                    .flatMap(urn -> aspectNames.stream()
+                            .map(a -> Key.builder().urn(urn).aspectName(a).build()))
+                    .collect(Collectors.toSet());
+            Map<Key, EnvelopedAspect> envelopedAspects = cache.getAll(keys);
+
+            Set<EntityResponse> responses = envelopedAspects.entrySet().stream()
+                    .map(entry -> Pair.of(entry.getKey().getUrn(), entry.getValue()))
+                    .collect(Collectors.groupingBy(Pair::getKey, Collectors.mapping(Pair::getValue, Collectors.toSet())))
+                    .entrySet().stream().map(e -> toEntityResponse(e.getKey(), e.getValue()))
+                    .collect(Collectors.toSet());
+
+            response = responses.stream().collect(Collectors.toMap(EntityResponse::getUrn, Function.identity()));
+        } else {
+            response = loadFunction.apply(urns, aspectNames);
+        }
+
+        return response;
+    }
+
+    private static EntityResponse toEntityResponse(Urn urn, Collection<EnvelopedAspect> envelopedAspects) {
+        final EntityResponse response = new EntityResponse();
+        response.setUrn(urn);
+        response.setEntityName(urnToEntityName(urn));
+        response.setAspects(new EnvelopedAspectMap(
+                envelopedAspects.stream()
+                        .collect(Collectors.toMap(EnvelopedAspect::getName, aspect -> aspect))
+        ));
+        return response;
+    }
+
+    public static class EntityClientCacheBuilder {
+
+        private EntityClientCacheBuilder cache(LoadingCache<Key, EnvelopedAspect> cache) {
+            return this;
+        }
+
+        public EntityClientCache build(Class<?> metricClazz) {
+            // estimate size
+            Weigher<Key, EnvelopedAspect> weighByEstimatedSize = (key, value) ->
+                    value.getValue().data().values().parallelStream()
+                            .mapToInt(o -> o.toString().getBytes().length)
+                            .sum();
+
+            // batch loads data from entity client (restli or java)
+            Function<Iterable<? extends Key>, Map<Key, EnvelopedAspect>> loader = (Iterable<? extends Key> keys) -> {
+                Map<String, Set<Key>> keysByEntity = StreamSupport.stream(keys.spliterator(), true)
+                        .collect(Collectors.groupingBy(Key::getEntityName, Collectors.toSet()));
+
+                Stream<Map.Entry<Key, EnvelopedAspect>> results = keysByEntity.entrySet().parallelStream()
+                        .flatMap(entry -> {
+                            Set<Urn> urns = entry.getValue().stream()
+                                    .map(Key::getUrn)
+                                    .collect(Collectors.toSet());
+                            Set<String> aspects = entry.getValue().stream()
+                                    .map(Key::getEntityName)
+                                    .collect(Collectors.toSet());
+                            return loadFunction.apply(urns, aspects).entrySet().stream();
+                        })
+                        .flatMap(resp -> resp.getValue().getAspects().values().stream()
+                                .map(envAspect -> {
+                                    Key key = Key.builder().urn(resp.getKey()).aspectName(envAspect.getName()).build();
+                                    return Map.entry(key, envAspect);
+                                }));
+
+                return results.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+            };
+
+            // ideally the cache time comes from caching headers from service, but configuration driven for now
+            BiFunction<EntityClientCacheConfig, Key, Integer> ttlSeconds = (config, key) ->
+                    Optional.ofNullable(config.getEntityAspectTTLSeconds()).orElse(Map.of())
+                            .getOrDefault(key.getEntityName(), Map.of())
+                            .getOrDefault(key.getAspectName(), config.getDefaultTTLSeconds());
+
+            cache = ClientCache.<Key, EnvelopedAspect, EntityClientCacheConfig>builder()
+                    .weigher(weighByEstimatedSize)
+                    .config(config)
+                    .loadFunction(loader)
+                    .ttlSecondsFunction(ttlSeconds)
+                    .build(metricClazz);
+
+            return new EntityClientCache(config, cache, loadFunction);
+        }
+    }
+
+    @Data
+    @Builder
+    protected static class Key {
+        private final Urn urn;
+        private final String aspectName;
+
+        public String getEntityName() {
+            return urn.getEntityType();
+        }
+    }
+}
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemEntityClient.java
new file mode 100644
index 0000000000000..94067abd0cf65
--- /dev/null
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemEntityClient.java
@@ -0,0 +1,91 @@
+package com.linkedin.entity.client;
+
+import com.datahub.authentication.Authentication;
+import com.linkedin.common.urn.Urn;
+import com.linkedin.entity.EntityResponse;
+import com.linkedin.metadata.config.cache.client.EntityClientCacheConfig;
+import com.linkedin.mxe.MetadataChangeProposal;
+import com.linkedin.mxe.PlatformEvent;
+import com.linkedin.r2.RemoteInvocationException;
+
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+import java.net.URISyntaxException;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Adds entity/aspect cache and assumes system authentication
+ */
+public interface SystemEntityClient extends EntityClient {
+
+    EntityClientCache getEntityClientCache();
+    Authentication getSystemAuthentication();
+
+    /**
+     * Builds the cache
+     * @param systemAuthentication system authentication
+     * @param cacheConfig cache configuration
+     * @return the cache
+     */
+    default EntityClientCache buildEntityClientCache(Class<?> metricClazz, Authentication systemAuthentication, EntityClientCacheConfig cacheConfig) {
+        return EntityClientCache.builder()
+                .config(cacheConfig)
+                .loadFunction((Set<Urn> urns, Set<String> aspectNames) -> {
+                    try {
+                        String entityName = urns.stream().findFirst().map(Urn::getEntityType).get();
+
+                        if (urns.stream().anyMatch(urn -> !urn.getEntityType().equals(entityName))) {
+                            throw new IllegalArgumentException("Urns must be of the same entity type. RestliEntityClient API limitation.");
+                        }
+
+                        return batchGetV2(entityName, urns, aspectNames, systemAuthentication);
+                    } catch (RemoteInvocationException | URISyntaxException e) {
+                        throw new RuntimeException(e);
+                    }
+                }).build(metricClazz);
+    }
+
+    /**
+     * Get an entity by urn with the given aspects
+     * @param urn the id of the entity
+     * @param aspectNames aspects of the entity
+     * @return response object
+     * @throws RemoteInvocationException
+     * @throws URISyntaxException
+     */
+    @Nullable
+    default EntityResponse getV2(@Nonnull Urn urn, @Nonnull Set<String> aspectNames)
+            throws RemoteInvocationException, URISyntaxException {
+        return getEntityClientCache().getV2(urn, aspectNames);
+    }
+
+    /**
+     * Batch get a set of aspects for a single entity type, multiple ids with the given aspects.
+     *
+     * @param urns the urns of the entities to batch get
+     * @param aspectNames the aspect names to batch get
+     * @throws RemoteInvocationException
+     */
+    @Nonnull
+    default Map<Urn, EntityResponse> batchGetV2(@Nonnull Set<Urn> urns, @Nonnull Set<String> aspectNames)
+            throws RemoteInvocationException, URISyntaxException {
+        return getEntityClientCache().batchGetV2(urns, aspectNames);
+    }
+
+    default void producePlatformEvent(@Nonnull String name, @Nullable String key, @Nonnull PlatformEvent event) throws Exception {
+        producePlatformEvent(name, key, event, getSystemAuthentication());
+    }
+
+    default boolean exists(@Nonnull Urn urn) throws RemoteInvocationException {
+        return exists(urn, getSystemAuthentication());
+    }
+
+    default String ingestProposal(@Nonnull final MetadataChangeProposal metadataChangeProposal, final boolean async) throws RemoteInvocationException {
+        return ingestProposal(metadataChangeProposal, getSystemAuthentication(), async);
+    }
+
+    default void setWritable(boolean canWrite) throws RemoteInvocationException {
+        setWritable(canWrite, getSystemAuthentication());
+    }
+}
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java
new file mode 100644
index 0000000000000..f3c343534209c
--- /dev/null
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java
@@ -0,0 +1,25 @@
+package com.linkedin.entity.client;
+
+import com.datahub.authentication.Authentication;
+import com.linkedin.metadata.config.cache.client.EntityClientCacheConfig;
+import com.linkedin.parseq.retry.backoff.BackoffPolicy;
+import com.linkedin.restli.client.Client;
+import lombok.Getter;
+
+import javax.annotation.Nonnull;
+
+/**
+ * Restli backed SystemEntityClient
+ */
+@Getter
+public class SystemRestliEntityClient extends RestliEntityClient implements SystemEntityClient {
+    private final EntityClientCache entityClientCache;
+    private final Authentication systemAuthentication;
+
+    public SystemRestliEntityClient(@Nonnull final Client restliClient, @Nonnull final BackoffPolicy backoffPolicy, int retryCount,
+                                    Authentication systemAuthentication, EntityClientCacheConfig cacheConfig) {
+        super(restliClient, backoffPolicy, retryCount);
+        this.systemAuthentication = systemAuthentication;
+        this.entityClientCache = buildEntityClientCache(SystemRestliEntityClient.class, systemAuthentication, cacheConfig);
+    }
+}
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClient.java
index 47a15ccdd3ffc..d2b8499615e8d 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClient.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClient.java
@@ -5,6 +5,7 @@
 
 import com.linkedin.common.WindowDuration;
 import com.linkedin.common.client.BaseClient;
+import com.linkedin.metadata.config.cache.client.UsageClientCacheConfig;
 import com.linkedin.parseq.retry.backoff.BackoffPolicy;
 import com.linkedin.r2.RemoteInvocationException;
 import com.linkedin.restli.client.Client;
@@ -17,19 +18,39 @@ public class UsageClient extends BaseClient {
     private static final UsageStatsRequestBuilders USAGE_STATS_REQUEST_BUILDERS =
         new UsageStatsRequestBuilders();
 
-    public UsageClient(@Nonnull final Client restliClient, @Nonnull final BackoffPolicy backoffPolicy, int retryCount) {
+    private final UsageClientCache usageClientCache;
+
+    public UsageClient(@Nonnull final Client restliClient, @Nonnull final BackoffPolicy backoffPolicy, int retryCount,
+                       Authentication systemAuthentication, UsageClientCacheConfig cacheConfig) {
         super(restliClient, backoffPolicy, retryCount);
+        this.usageClientCache = UsageClientCache.builder()
+                .config(cacheConfig)
+                .loadFunction((String resource, UsageTimeRange range) -> {
+                    try {
+                        return getUsageStats(resource, range, systemAuthentication);
+                    } catch (RemoteInvocationException | URISyntaxException e) {
+                        throw new RuntimeException(e);
+                    }
+                }).build();
+    }
+
+    /**
+     * Gets a specific version of downstream {@link EntityRelationships} for the given dataset.
+     * Using cache and system authentication.
+     * Validate permissions before use!
+     */
+    @Nonnull
+    public UsageQueryResult getUsageStats(@Nonnull String resource, @Nonnull UsageTimeRange range) {
+        return usageClientCache.getUsageStats(resource, range);
     }
 
     /**
      * Gets a specific version of downstream {@link EntityRelationships} for the given dataset.
      */
     @Nonnull
-    public UsageQueryResult getUsageStats(
-        @Nonnull String resource,
-        @Nonnull UsageTimeRange range,
-        @Nonnull Authentication authentication
-    ) throws RemoteInvocationException, URISyntaxException {
+    private UsageQueryResult getUsageStats(@Nonnull String resource, @Nonnull UsageTimeRange range,
+                                           @Nonnull Authentication authentication)
+            throws RemoteInvocationException, URISyntaxException {
         final UsageStatsDoQueryRangeRequestBuilder requestBuilder = USAGE_STATS_REQUEST_BUILDERS.actionQueryRange()
             .resourceParam(resource)
             .durationParam(WindowDuration.DAY)
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClientCache.java b/metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClientCache.java
new file mode 100644
index 0000000000000..a04c1e90fb4a3
--- /dev/null
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/usage/UsageClientCache.java
@@ -0,0 +1,75 @@
+package com.linkedin.usage;
+
+import com.github.benmanes.caffeine.cache.LoadingCache;
+import com.github.benmanes.caffeine.cache.Weigher;
+import com.linkedin.common.client.ClientCache;
+import com.linkedin.metadata.config.cache.client.UsageClientCacheConfig;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NonNull;
+
+import javax.annotation.Nonnull;
+import java.util.Map;
+import java.util.function.BiFunction;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
+
+
+@Builder
+public class UsageClientCache {
+    @NonNull
+    private UsageClientCacheConfig config;
+    @NonNull
+    private final ClientCache<Key, UsageQueryResult, UsageClientCacheConfig> cache;
+    @NonNull
+    private BiFunction<String, UsageTimeRange, UsageQueryResult> loadFunction;
+
+    public UsageQueryResult getUsageStats(@Nonnull String resource, @Nonnull UsageTimeRange range) {
+        if (config.isEnabled()) {
+            return cache.get(Key.builder().resource(resource).range(range).build());
+        } else {
+            return loadFunction.apply(resource, range);
+        }
+    }
+
+    public static class UsageClientCacheBuilder {
+
+        private UsageClientCacheBuilder cache(LoadingCache<Key, UsageQueryResult> cache) {
+            return this;
+        }
+
+        public UsageClientCache build() {
+            // estimate size
+            Weigher<Key, UsageQueryResult> weighByEstimatedSize = (key, value) ->
+                    value.data().values().parallelStream()
+                            .mapToInt(o -> o.toString().getBytes().length)
+                            .sum();
+
+            // batch loads data from usage client
+            Function<Iterable<? extends Key>, Map<Key, UsageQueryResult>> loader = (Iterable<? extends Key> keys) ->
+                    StreamSupport.stream(keys.spliterator(), true)
+                            .map(k -> Map.entry(k, loadFunction.apply(k.getResource(), k.getRange())))
+                            .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+
+            // default ttl only
+            BiFunction<UsageClientCacheConfig, Key, Integer> ttlSeconds = (config, key) -> config.getDefaultTTLSeconds();
+
+            cache = ClientCache.<Key, UsageQueryResult, UsageClientCacheConfig>builder()
+                    .weigher(weighByEstimatedSize)
+                    .config(config)
+                    .loadFunction(loader)
+                    .ttlSecondsFunction(ttlSeconds)
+                    .build(UsageClientCache.class);
+
+            return new UsageClientCache(config, cache, loadFunction);
+        }
+    }
+
+    @Data
+    @Builder
+    protected static class Key {
+        private final String resource;
+        private final UsageTimeRange range;
+    }
+}
diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/MetricUtils.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/MetricUtils.java
index 3d90cba85b0fb..9a8848e090fb8 100644
--- a/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/MetricUtils.java
+++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/MetricUtils.java
@@ -1,6 +1,7 @@
 package com.linkedin.metadata.utils.metrics;
 
 import com.codahale.metrics.Counter;
+import com.codahale.metrics.Gauge;
 import com.codahale.metrics.MetricRegistry;
 import com.codahale.metrics.SharedMetricRegistries;
 import com.codahale.metrics.Timer;
@@ -48,4 +49,8 @@ public static Timer timer(Class<?> klass, String metricName) {
   public static Timer timer(String metricName) {
     return REGISTRY.timer(MetricRegistry.name(metricName));
   }
+
+  public static <T extends Gauge<?>> T gauge(Class<?> clazz, String metricName, MetricRegistry.MetricSupplier<T> supplier) {
+    return REGISTRY.gauge(MetricRegistry.name(clazz, metricName), supplier);
+  }
 }

From aff1e7a620352071f3b2e12c5598ec689652cc9d Mon Sep 17 00:00:00 2001
From: Jeff Merrick <jeff@wireform.io>
Date: Fri, 22 Sep 2023 00:53:42 -0500
Subject: [PATCH 037/156] docs: add homepage ctas (#8866)

Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
---
 .../_components/CardCTAs/cardCTAs.module.scss | 24 +++++++++
 .../src/pages/_components/CardCTAs/index.js   | 52 +++++++++++++++++++
 .../src/pages/_components/Hero/index.js       |  9 +++-
 3 files changed, 84 insertions(+), 1 deletion(-)
 create mode 100644 docs-website/src/pages/_components/CardCTAs/cardCTAs.module.scss
 create mode 100644 docs-website/src/pages/_components/CardCTAs/index.js

diff --git a/docs-website/src/pages/_components/CardCTAs/cardCTAs.module.scss b/docs-website/src/pages/_components/CardCTAs/cardCTAs.module.scss
new file mode 100644
index 0000000000000..fcd3666d03ddc
--- /dev/null
+++ b/docs-website/src/pages/_components/CardCTAs/cardCTAs.module.scss
@@ -0,0 +1,24 @@
+.flexCol {
+  display: flex;
+}
+
+.ctaCard {
+  flex-direction: row;
+  align-items: flex-start;
+  justify-content: space-between;
+  row-gap: 1rem;
+  padding: 1rem;
+  &:hover {
+    text-decoration: none;
+    border: 1px solid var(--ifm-color-primary);
+    background-color: var(--ifm-background-surface-color);
+  }
+  margin-bottom: 1rem;
+  flex: 1;
+}
+
+.ctaHeading {
+  margin-bottom: 0;
+  display: flex;
+  align-items: center;
+}
diff --git a/docs-website/src/pages/_components/CardCTAs/index.js b/docs-website/src/pages/_components/CardCTAs/index.js
new file mode 100644
index 0000000000000..d87c803b42818
--- /dev/null
+++ b/docs-website/src/pages/_components/CardCTAs/index.js
@@ -0,0 +1,52 @@
+import React from "react";
+import clsx from "clsx";
+import styles from "./cardCTAs.module.scss";
+import useBaseUrl from "@docusaurus/useBaseUrl";
+import { ArrowRightOutlined } from "@ant-design/icons";
+
+const cardsContent = [
+  {
+    label: "Data Mesh",
+    title: "Data Products, Delivered",
+    url: "https://www.acryldata.io/blog/data-products-in-datahub-everything-you-need-to-know",
+  },
+  {
+    label: "Data Contracts",
+    title: "End-to-end Reliability in Data",
+    url: "https://www.acryldata.io/blog/data-contracts-in-datahub-combining-verifiability-with-holistic-data-management",
+  },
+  {
+    label: "Shift Left",
+    title: "Developer-friendly Data Governance",
+    url: "https://www.acryldata.io/blog/the-3-must-haves-of-metadata-management-part-2",
+  },
+];
+
+const Card = ({ label, title, url }) => {
+  return (
+    <div className={clsx("col col--4 flex", styles.flexCol)}>
+      <a href={url} target="_blank" className={clsx("card", styles.ctaCard)}>
+        <div>
+          <strong>{label}</strong>
+          <h3 className={styles.ctaHeading}>{title}&nbsp;</h3>
+        </div>
+        <ArrowRightOutlined />
+      </a>
+    </div>
+  );
+};
+
+const CardCTAs = () =>
+  cardsContent?.length > 0 ? (
+    <div style={{ padding: "2vh 0" }}>
+      <div className="container">
+        <div className="row">
+          {cardsContent.map((props, idx) => (
+            <Card key={idx} {...props} />
+          ))}
+        </div>
+      </div>
+    </div>
+  ) : null;
+
+export default CardCTAs;
diff --git a/docs-website/src/pages/_components/Hero/index.js b/docs-website/src/pages/_components/Hero/index.js
index b5fa04c80faee..22b406dce037e 100644
--- a/docs-website/src/pages/_components/Hero/index.js
+++ b/docs-website/src/pages/_components/Hero/index.js
@@ -7,6 +7,7 @@ import { useColorMode } from "@docusaurus/theme-common";
 import { QuestionCircleOutlined } from "@ant-design/icons";
 import styles from "./hero.module.scss";
 import CodeBlock from "@theme/CodeBlock";
+import CardCTAs from "../CardCTAs";
 
 const HeroAnnouncement = ({ message, linkUrl, linkText }) => (
   <div className={clsx("hero__alert alert alert--primary", styles.hero__alert)}>
@@ -33,7 +34,12 @@ const Hero = ({}) => {
               complexity of your data ecosystem.
             </p>
             <p className="hero__subtitle">
-Built with ❤️ by <img src="https://datahubproject.io/img/acryl-logo-light-mark.png" style={{'vertical-align': 'text-top;'}} width="25"/> <a href="https://acryldata.io" target="blank" rel="noopener noreferrer">Acryl Data</a> and <img src="https://datahubproject.io/img/LI-In-Bug.png" width="25"/> LinkedIn.
+              Built with ❤️ by{" "}
+              <img src="https://datahubproject.io/img/acryl-logo-light-mark.png" style={{ "vertical-align": "text-top;" }} width="25" />{" "}
+              <a href="https://acryldata.io" target="blank" rel="noopener noreferrer">
+                Acryl Data
+              </a>{" "}
+              and <img src="https://datahubproject.io/img/LI-In-Bug.png" width="25" /> LinkedIn.
             </p>
             <Link className="button button--primary button--md" to={useBaseUrl("docs/")}>
               Get Started →
@@ -43,6 +49,7 @@ Built with ❤️ by <img src="https://datahubproject.io/img/acryl-logo-light-ma
             </Link>
           </div>
         </div>
+        <CardCTAs />
         <Image className="hero__image" img={require(`/img/diagrams/datahub-flow-diagram-${colorMode}.png`)} alt="DataHub Flow Diagram" />
         <div className="quickstart__content">
           <h1 className="quickstart__title">Get Started Now</h1>

From c946c01199e88e724ee0e6e9e7c9ee58c212803b Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Fri, 22 Sep 2023 13:01:38 -0700
Subject: [PATCH 038/156] fix(ingest/bigquery): show report in output (#8867)

Co-authored-by: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Co-authored-by: Andrew Sikowitz <andrew.sikowitz@acryl.io>
---
 .../source/bigquery_v2/bigquery_report.py     | 23 +++++++++++--------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py
index 2d6882caa38ef..661589a0c58e5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py
@@ -6,6 +6,7 @@
 
 import pydantic
 
+from datahub.ingestion.api.report import Report
 from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport
 from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
 from datahub.ingestion.source_report.time_window import BaseTimeWindowReport
@@ -16,18 +17,20 @@
 logger: logging.Logger = logging.getLogger(__name__)
 
 
-class BigQuerySchemaApiPerfReport:
-    list_projects = PerfTimer()
-    list_datasets = PerfTimer()
-    get_columns_for_dataset = PerfTimer()
-    get_tables_for_dataset = PerfTimer()
-    list_tables = PerfTimer()
-    get_views_for_dataset = PerfTimer()
+@dataclass
+class BigQuerySchemaApiPerfReport(Report):
+    list_projects: PerfTimer = field(default_factory=PerfTimer)
+    list_datasets: PerfTimer = field(default_factory=PerfTimer)
+    get_columns_for_dataset: PerfTimer = field(default_factory=PerfTimer)
+    get_tables_for_dataset: PerfTimer = field(default_factory=PerfTimer)
+    list_tables: PerfTimer = field(default_factory=PerfTimer)
+    get_views_for_dataset: PerfTimer = field(default_factory=PerfTimer)
 
 
-class BigQueryAuditLogApiPerfReport:
-    get_exported_log_entries = PerfTimer()
-    list_log_entries = PerfTimer()
+@dataclass
+class BigQueryAuditLogApiPerfReport(Report):
+    get_exported_log_entries: PerfTimer = field(default_factory=PerfTimer)
+    list_log_entries: PerfTimer = field(default_factory=PerfTimer)
 
 
 @dataclass

From 146cb896c291f5062f2d8ae90c6a13dad4f01eab Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Fri, 22 Sep 2023 16:37:13 -0700
Subject: [PATCH 039/156] fix(docker): support alternate postgres db in
 postgres-setup (#8800)

---
 docker/postgres-setup/init.sh | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/docker/postgres-setup/init.sh b/docker/postgres-setup/init.sh
index 6c0adc8c69bdd..afc9bdfe4c668 100755
--- a/docker/postgres-setup/init.sh
+++ b/docker/postgres-setup/init.sh
@@ -1,8 +1,13 @@
 #!/bin/sh
 export PGPASSWORD=$POSTGRES_PASSWORD
 
+POSTGRES_CREATE_DB=${POSTGRES_CREATE_DB:-true}
+POSTGRES_CREATE_DB_CONNECTION_DB=${POSTGRES_CREATE_DB_CONNECTION_DB:-postgres}
+
 # workaround create database if not exists, check https://stackoverflow.com/a/36591842
-psql -U $POSTGRES_USERNAME -h $POSTGRES_HOST -p $POSTGRES_PORT -tc "SELECT 1 FROM pg_database WHERE datname = '${DATAHUB_DB_NAME}'" | grep -q 1 || psql -U $POSTGRES_USERNAME -h $POSTGRES_HOST -p $POSTGRES_PORT -c "CREATE DATABASE ${DATAHUB_DB_NAME}"
+if [ "$POSTGRES_CREATE_DB" = true ]; then
+    psql -d "$POSTGRES_CREATE_DB_CONNECTION_DB" -U "$POSTGRES_USERNAME" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -tc "SELECT 1 FROM pg_database WHERE datname = '${DATAHUB_DB_NAME}'" | grep -q 1 || psql -d "$POSTGRES_CREATE_DB_CONNECTION_DB" -U "$POSTGRES_USERNAME" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -c "CREATE DATABASE ${DATAHUB_DB_NAME}"
+fi
 
 sed -e "s/DATAHUB_DB_NAME/${DATAHUB_DB_NAME}/g" /init.sql | tee -a /tmp/init-final.sql
-psql -d $DATAHUB_DB_NAME -U $POSTGRES_USERNAME -h $POSTGRES_HOST -p $POSTGRES_PORT < /tmp/init-final.sql
+psql -d "$DATAHUB_DB_NAME" -U "$POSTGRES_USERNAME" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" < /tmp/init-final.sql

From 791e2e7bf588d96bad94ccfdcf1beddde02dadc3 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Fri, 22 Sep 2023 16:43:58 -0700
Subject: [PATCH 040/156] feat(python): support custom models without forking
 (#8774)

---
 docs/datahub_lite.md                          |   1 -
 docs/modeling/extending-the-metadata-model.md | 105 ++++++++++------
 metadata-ingestion/.gitignore                 |   1 +
 metadata-ingestion/README.md                  |   2 +-
 metadata-ingestion/build.gradle               |   8 ++
 metadata-ingestion/scripts/avro_codegen.py    |  49 +++++++-
 .../scripts/custom_package_codegen.py         | 119 ++++++++++++++++++
 .../scripts/custom_package_codegen.sh         |  16 +++
 metadata-ingestion/setup.py                   |   8 +-
 .../src/datahub/cli/check_cli.py              |   2 +-
 .../src/datahub/cli/ingest_cli.py             |  15 +--
 .../src/datahub/ingestion/api/registry.py     |  23 ++--
 .../utilities/_custom_package_loader.py       |  43 +++++++
 13 files changed, 316 insertions(+), 76 deletions(-)
 create mode 100644 metadata-ingestion/scripts/custom_package_codegen.py
 create mode 100755 metadata-ingestion/scripts/custom_package_codegen.sh
 create mode 100644 metadata-ingestion/src/datahub/utilities/_custom_package_loader.py

diff --git a/docs/datahub_lite.md b/docs/datahub_lite.md
index 3918b8cee7830..de0a20eed1d01 100644
--- a/docs/datahub_lite.md
+++ b/docs/datahub_lite.md
@@ -7,7 +7,6 @@ import TabItem from '@theme/TabItem';
 
 DataHub Lite is a lightweight embeddable version of DataHub with no external dependencies. It is intended to enable local developer tooling use-cases such as simple access to metadata for scripts and other tools.
 DataHub Lite is compatible with the DataHub metadata format and all the ingestion connectors that DataHub supports.
-It was built as a reaction to [recap](https://github.com/recap-cloud/recap) to prove that a similar lightweight system could be built within DataHub quite easily.
 Currently DataHub Lite uses DuckDB under the covers as its default storage layer, but that might change in the future.
 
 ## Features
diff --git a/docs/modeling/extending-the-metadata-model.md b/docs/modeling/extending-the-metadata-model.md
index be2d7d795de70..ba101be16b98e 100644
--- a/docs/modeling/extending-the-metadata-model.md
+++ b/docs/modeling/extending-the-metadata-model.md
@@ -16,7 +16,6 @@ An important question that will arise once you've decided to extend the metadata
   <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/metadata-model-to-fork-or-not-to.png"/>
 </p>
 
-
 The green lines represent pathways that will lead to lesser friction for you to maintain your code long term. The red lines represent higher risk of conflicts in the future. We are working hard to move the majority of model extension use-cases to no-code / low-code pathways to ensure that you can extend the core metadata model without having to maintain a custom fork of DataHub.
 
 We will refer to the two options as the **open-source fork** and **custom repository** approaches in the rest of the document below.
@@ -92,10 +91,11 @@ the annotation model.
 Define the entity within an `entity-registry.yml` file. Depending on your approach, the location of this file may vary. More on that in steps [4](#step-4-choose-a-place-to-store-your-model-extension) and [5](#step-5-attaching-your-non-key-aspects-to-the-entity).
 
 Example:
+
 ```yaml
-  - name: dashboard
-    doc: A container of related data assets.
-    keyAspect: dashboardKey
+- name: dashboard
+  doc: A container of related data assets.
+  keyAspect: dashboardKey
 ```
 
 - name: The entity name/type, this will be present as a part of the Urn.
@@ -196,8 +196,8 @@ The Aspect has four key components: its properties, the @Aspect annotation, the
   can be defined as PDL primitives, enums, records, or collections (
   see [pdl schema documentation](https://linkedin.github.io/rest.li/pdl_schema))
   references to other entities, of type Urn or optionally `<Entity>Urn`
-- **@Aspect annotation**: Declares record is an Aspect and includes it when serializing an entity. Unlike the following 
-  two annotations, @Aspect is applied to the entire record, rather than a specific field.  Note, you can mark an aspect 
+- **@Aspect annotation**: Declares record is an Aspect and includes it when serializing an entity. Unlike the following
+  two annotations, @Aspect is applied to the entire record, rather than a specific field. Note, you can mark an aspect
   as a timeseries aspect. Check out this [doc](metadata-model.md#timeseries-aspects) for details.
 - **@Searchable annotation**: This annotation can be applied to any primitive field or a map field to indicate that it
   should be indexed in Elasticsearch and can be searched on. For a complete guide on using the search annotation, see
@@ -205,7 +205,7 @@ The Aspect has four key components: its properties, the @Aspect annotation, the
 - **@Relationship annotation**: These annotations create edges between the Entity’s Urn and the destination of the
   annotated field when the entities are ingested. @Relationship annotations must be applied to fields of type Urn. In
   the case of DashboardInfo, the `charts` field is an Array of Urns. The @Relationship annotation cannot be applied
-  directly to an array of Urns. That’s why you see the use of an Annotation override (`”/*”:) to apply the @Relationship
+  directly to an array of Urns. That’s why you see the use of an Annotation override (`"/*":`) to apply the @Relationship
   annotation to the Urn directly. Read more about overrides in the annotation docs further down on this page.
 
 After you create your Aspect, you need to attach to all the entities that it applies to.
@@ -231,7 +231,7 @@ entities:
    - keyAspect: dashBoardKey
    aspects:
      # the name of the aspect must be the same as that on the @Aspect annotation on the class
-     - dashboardInfo  
+     - dashboardInfo
 ```
 
 Previously, you were required to add all aspects for the entity into an Aspect union. You will see examples of this pattern throughout the code-base (e.g. `DatasetAspect`, `DashboardAspect` etc.). This is no longer required.
@@ -251,14 +251,39 @@ Then, run `./gradlew build` from the repository root to rebuild Datahub with acc
 Then, re-deploy metadata-service (gms), and mae-consumer and mce-consumer (optionally if you are running them unbundled). See [docker development](../../docker/README.md) for details on how
 to deploy during development. This will allow Datahub to read and write your new entity or extensions to existing entities, along with serving search and graph queries for that entity type.
 
-To emit proposals to ingest from the Datahub CLI tool, first install datahub cli
-locally [following the instructions here](../../metadata-ingestion/developing.md). `./gradlew build` generated the avro
-schemas your local ingestion cli tool uses earlier. After following the developing guide, you should be able to emit
-your new event using the local datahub cli.
+### <a name="step_7"></a>(Optional) Step 7: Use custom models with the Python SDK
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+<Tabs>
+<TabItem value="local" label="Local CLI" default>
+
+If you're purely using the custom models locally, you can use a local development-mode install of the DataHub CLI.
+
+Install the DataHub CLI locally by following the [developer instructions](../../metadata-ingestion/developing.md).
+The `./gradlew build` command already generated the avro schemas for your local ingestion cli tool to use.
+After following the developing guide, you should be able to emit your new event using the local DataHub CLI.
+
+</TabItem>
+<TabItem value="packaged" label="Custom Models Package">
 
-Now you are ready to start ingesting metadata for your new entity!
+If you want to use your custom models beyond your local machine without forking DataHub, then you can generate a custom model package that can be installed from other places.
 
-### <a name="step_7"></a>(Optional) Step 7: Extend the DataHub frontend to view your entity in GraphQL & React
+This package should be installed alongside the base `acryl-datahub` package, and its metadata models will take precedence over the default ones.
+
+```bash
+cd metadata-ingestion
+../gradlew customPackageGenerate -Ppackage_name=my-company-datahub-models -Ppackage_version="0.0.1"
+```
+
+This will generate some Python build artifacts, which you can distribute within your team or publish to PyPI.
+The command output will contain additional details and exact CLI commands you can use.
+
+</TabItem>
+</Tabs>
+
+### <a name="step_8"></a>(Optional) Step 8: Extend the DataHub frontend to view your entity in GraphQL & React
 
 If you are extending an entity with additional aspects, and you can use the auto-render specifications to automatically render these aspects to your satisfaction, you do not need to write any custom code.
 
@@ -301,9 +326,9 @@ It takes the following parameters:
 - **autoRender**: boolean (optional) - defaults to false. When set to true, the aspect will automatically be displayed
   on entity pages in a tab using a default renderer. **_This is currently only supported for Charts, Dashboards, DataFlows, DataJobs, Datasets, Domains, and GlossaryTerms_**.
 - **renderSpec**: RenderSpec (optional) - config for autoRender aspects that controls how they are displayed. **_This is currently only supported for Charts, Dashboards, DataFlows, DataJobs, Datasets, Domains, and GlossaryTerms_**. Contains three fields:
-    - **displayType**: One of `tabular`, `properties`. Tabular should be used for a list of data elements, properties for a single data bag.
-    - **displayName**: How the aspect should be referred to in the UI. Determines the name of the tab on the entity page.
-    - **key**: For `tabular` aspects only. Specifies the key in which the array to render may be found.
+  - **displayType**: One of `tabular`, `properties`. Tabular should be used for a list of data elements, properties for a single data bag.
+  - **displayName**: How the aspect should be referred to in the UI. Determines the name of the tab on the entity page.
+  - **key**: For `tabular` aspects only. Specifies the key in which the array to render may be found.
 
 ##### Example
 
@@ -329,34 +354,34 @@ It takes the following parameters:
 
   Thus far, we have implemented 11 fieldTypes:
 
-    1. *KEYWORD* - Short text fields that only support exact matches, often used only for filtering
+  1. _KEYWORD_ - Short text fields that only support exact matches, often used only for filtering
+
+  2. _TEXT_ - Text fields delimited by spaces/slashes/periods. Default field type for string variables.
 
-    2. *TEXT* - Text fields delimited by spaces/slashes/periods. Default field type for string variables.
+  3. _TEXT_PARTIAL_ - Text fields delimited by spaces/slashes/periods with partial matching support. Note, partial
+     matching is expensive, so this field type should not be applied to fields with long values (like description)
 
-    3. *TEXT_PARTIAL* - Text fields delimited by spaces/slashes/periods with partial matching support. Note, partial
-       matching is expensive, so this field type should not be applied to fields with long values (like description)
+  4. _WORD_GRAM_ - Text fields delimited by spaces, slashes, periods, dashes, or underscores with partial matching AND
+     word gram support. That is, the text will be split by the delimiters and can be matched with delimited queries
+     matching two, three, or four length tokens in addition to single tokens. As with partial match, this type is
+     expensive, so should not be applied to fields with long values such as description.
 
-    4. *WORD_GRAM* - Text fields delimited by spaces, slashes, periods, dashes, or underscores with partial matching AND 
-       word gram support. That is, the text will be split by the delimiters and can be matched with delimited queries
-       matching two, three, or four length tokens in addition to single tokens. As with partial match, this type is 
-       expensive, so should not be applied to fields with long values such as description.
+  5. _BROWSE_PATH_ - Field type for browse paths. Applies specific mappings for slash delimited paths.
 
-    5. *BROWSE_PATH* - Field type for browse paths. Applies specific mappings for slash delimited paths.
+  6. _URN_ - Urn fields where each sub-component inside the urn is indexed. For instance, for a data platform urn like
+     "urn:li:dataplatform:kafka", it will index the platform name "kafka" and ignore the common components
 
-    6. *URN* - Urn fields where each sub-component inside the urn is indexed. For instance, for a data platform urn like
-       "urn:li:dataplatform:kafka", it will index the platform name "kafka" and ignore the common components
+  7. _URN_PARTIAL_ - Urn fields where each sub-component inside the urn is indexed with partial matching support.
 
-    7. *URN_PARTIAL* - Urn fields where each sub-component inside the urn is indexed with partial matching support.
+  8. _BOOLEAN_ - Boolean fields used for filtering.
 
-    8. *BOOLEAN* - Boolean fields used for filtering.
+  9. _COUNT_ - Count fields used for filtering.
 
-    9. *COUNT* - Count fields used for filtering.
-  
-    10. *DATETIME* - Datetime fields used to represent timestamps.
+  10. _DATETIME_ - Datetime fields used to represent timestamps.
 
-    11. *OBJECT* - Each property in an object will become an extra column in Elasticsearch and can be referenced as 
-    `field.property` in queries. You should be careful to not use it on objects with many properties as it can cause a
-    mapping explosion in Elasticsearch.
+  11. _OBJECT_ - Each property in an object will become an extra column in Elasticsearch and can be referenced as
+      `field.property` in queries. You should be careful to not use it on objects with many properties as it can cause a
+      mapping explosion in Elasticsearch.
 
 - **fieldName**: string (optional) - The name of the field in search index document. Defaults to the field name where
   the annotation resides.
@@ -401,13 +426,13 @@ Now, when Datahub ingests Dashboards, it will index the Dashboard’s title in E
 Dashboards, that query will be used to search on the title index and matching Dashboards will be returned.
 
 Note, when @Searchable annotation is applied to a map, it will convert it into a list with "key.toString()
-=value.toString()" as elements. This allows us to index map fields, while not increasing the number of columns indexed. 
+=value.toString()" as elements. This allows us to index map fields, while not increasing the number of columns indexed.
 This way, the keys can be queried by `aMapField:key1=value1`.
 
-You can change this behavior by specifying the fieldType as OBJECT in the @Searchable annotation. It will put each key 
-into a column in Elasticsearch instead of an array of serialized kay-value pairs. This way the query would look more 
+You can change this behavior by specifying the fieldType as OBJECT in the @Searchable annotation. It will put each key
+into a column in Elasticsearch instead of an array of serialized kay-value pairs. This way the query would look more
 like `aMapField.key1:value1`. As this method will increase the number of columns with each unique key - large maps can
-cause a mapping explosion in Elasticsearch. You should *not* use the object fieldType if you expect your maps to get 
+cause a mapping explosion in Elasticsearch. You should _not_ use the object fieldType if you expect your maps to get
 large.
 
 #### @Relationship
diff --git a/metadata-ingestion/.gitignore b/metadata-ingestion/.gitignore
index 673c8e0995872..acc15c4598869 100644
--- a/metadata-ingestion/.gitignore
+++ b/metadata-ingestion/.gitignore
@@ -8,6 +8,7 @@ bq_credentials.json
 junit.*.xml
 /tmp
 *.bak
+custom-package/
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md
index 3b1aae0b24f88..a0fef614528cb 100644
--- a/metadata-ingestion/README.md
+++ b/metadata-ingestion/README.md
@@ -176,7 +176,7 @@ The `deploy` subcommand of the `ingest` command tree allows users to upload thei
 datahub ingest deploy -n <user friendly name for ingestion> -c recipe.yaml
 ```
 
-By default, no schedule is done unless explicitly configured with the `--schedule` parameter. Timezones are inferred from the system time, can be overriden with `--time-zone` flag.
+By default, no schedule is done unless explicitly configured with the `--schedule` parameter. Schedule timezones are UTC by default and can be overriden with `--time-zone` flag.
 ```shell
 datahub ingest deploy -n test --schedule "0 * * * *" --time-zone "Europe/London" -c recipe.yaml
 ```
diff --git a/metadata-ingestion/build.gradle b/metadata-ingestion/build.gradle
index c20d98cbcbb58..ea7990ab9c660 100644
--- a/metadata-ingestion/build.gradle
+++ b/metadata-ingestion/build.gradle
@@ -62,6 +62,14 @@ task codegen(type: Exec, dependsOn: [environmentSetup, installPackage, ':metadat
   commandLine 'bash', '-c', "source ${venv_name}/bin/activate && ./scripts/codegen.sh"
 }
 
+task customPackageGenerate(type: Exec, dependsOn: [environmentSetup, installPackage, ':metadata-events:mxe-schemas:build']) {
+  def package_name = project.findProperty('package_name')
+  def package_version = project.findProperty('package_version')
+  commandLine 'bash', '-c',
+    "source ${venv_name}/bin/activate && " +
+    "./scripts/custom_package_codegen.sh '${package_name}' '${package_version}'"
+}
+
 task install(dependsOn: [installPackage, codegen])
 
 task installDev(type: Exec, dependsOn: [install]) {
diff --git a/metadata-ingestion/scripts/avro_codegen.py b/metadata-ingestion/scripts/avro_codegen.py
index 29ffa571c0ac8..a9b9b4b20f5ac 100644
--- a/metadata-ingestion/scripts/avro_codegen.py
+++ b/metadata-ingestion/scripts/avro_codegen.py
@@ -343,8 +343,15 @@ class AspectBag(TypedDict, total=False):
     "schemas_path", type=click.Path(exists=True, file_okay=False), required=True
 )
 @click.argument("outdir", type=click.Path(), required=True)
+@click.option("--check-unused-aspects", is_flag=True, default=False)
+@click.option("--enable-custom-loader", is_flag=True, default=True)
 def generate(
-    entity_registry: str, pdl_path: str, schemas_path: str, outdir: str
+    entity_registry: str,
+    pdl_path: str,
+    schemas_path: str,
+    outdir: str,
+    check_unused_aspects: bool,
+    enable_custom_loader: bool,
 ) -> None:
     entities = load_entity_registry(Path(entity_registry))
     schemas = load_schemas(schemas_path)
@@ -388,10 +395,13 @@ def generate(
             aspect["Aspect"]["entityDoc"] = entity.doc
 
     # Check for unused aspects. We currently have quite a few.
-    # unused_aspects = set(aspects.keys()) - set().union(
-    #     {entity.keyAspect for entity in entities},
-    #     *(set(entity.aspects) for entity in entities),
-    # )
+    if check_unused_aspects:
+        unused_aspects = set(aspects.keys()) - set().union(
+            {entity.keyAspect for entity in entities},
+            *(set(entity.aspects) for entity in entities),
+        )
+        if unused_aspects:
+            raise ValueError(f"Unused aspects: {unused_aspects}")
 
     merged_schema = merge_schemas(list(schemas.values()))
     write_schema_files(merged_schema, outdir)
@@ -404,6 +414,35 @@ def generate(
         Path(outdir) / "schema_classes.py",
     )
 
+    if enable_custom_loader:
+        # Move schema_classes.py -> _schema_classes.py
+        # and add a custom loader.
+        (Path(outdir) / "_schema_classes.py").write_text(
+            (Path(outdir) / "schema_classes.py").read_text()
+        )
+        (Path(outdir) / "schema_classes.py").write_text(
+            """
+# This is a specialized shim layer that allows us to dynamically load custom models from elsewhere.
+
+import importlib
+from typing import TYPE_CHECKING
+
+from datahub.utilities._custom_package_loader import get_custom_models_package
+
+_custom_package_path = get_custom_models_package()
+
+if TYPE_CHECKING or not _custom_package_path:
+    from ._schema_classes import *
+
+    # Required explicitly because __all__ doesn't include _ prefixed names.
+    from ._schema_classes import _Aspect, __SCHEMA_TYPES
+else:
+    _custom_package = importlib.import_module(_custom_package_path)
+    globals().update(_custom_package.__dict__)
+
+"""
+        )
+
     # Keep a copy of a few raw avsc files.
     required_avsc_schemas = {"MetadataChangeEvent", "MetadataChangeProposal"}
     schema_save_dir = Path(outdir) / "schemas"
diff --git a/metadata-ingestion/scripts/custom_package_codegen.py b/metadata-ingestion/scripts/custom_package_codegen.py
new file mode 100644
index 0000000000000..4a674550d49df
--- /dev/null
+++ b/metadata-ingestion/scripts/custom_package_codegen.py
@@ -0,0 +1,119 @@
+import re
+import subprocess
+import sys
+from pathlib import Path
+
+import avro_codegen
+import click
+
+if sys.version_info < (3, 10):
+    from importlib_metadata import version
+else:
+    from importlib.metadata import version
+
+_avrogen_version = version("avro-gen3")
+
+autogen_header = """# Autogenerated by datahub's custom_package_codegen.py
+# DO NOT EDIT THIS FILE DIRECTLY
+"""
+
+
+def python_package_name_normalize(name):
+    return re.sub(r"[-_.]+", "_", name).lower()
+
+
+@click.command()
+@click.argument(
+    "entity_registry", type=click.Path(exists=True, dir_okay=False), required=True
+)
+@click.argument(
+    "pdl_path", type=click.Path(exists=True, file_okay=False), required=True
+)
+@click.argument(
+    "schemas_path", type=click.Path(exists=True, file_okay=False), required=True
+)
+@click.argument("outdir", type=click.Path(), required=True)
+@click.argument("package_name", type=str, required=True)
+@click.argument("package_version", type=str, required=True)
+@click.pass_context
+def generate(
+    ctx: click.Context,
+    entity_registry: str,
+    pdl_path: str,
+    schemas_path: str,
+    outdir: str,
+    package_name: str,
+    package_version: str,
+) -> None:
+    package_path = Path(outdir) / package_name
+    if package_path.is_absolute():
+        raise click.UsageError("outdir must be a relative path")
+
+    python_package_name = python_package_name_normalize(package_name)
+    click.echo(
+        f"Generating distribution {package_name} (package name {python_package_name}) at {package_path}"
+    )
+
+    src_path = package_path / "src" / python_package_name
+    src_path.mkdir(parents=True)
+
+    ctx.invoke(
+        avro_codegen.generate,
+        entity_registry=entity_registry,
+        pdl_path=pdl_path,
+        schemas_path=schemas_path,
+        outdir=str(src_path / "models"),
+        enable_custom_loader=False,
+    )
+
+    (src_path / "__init__.py").write_text(
+        f"""{autogen_header}
+__package_name__ = "{package_name}"
+__version__ = "{package_version}"
+"""
+    )
+
+    (package_path / "setup.py").write_text(
+        f"""{autogen_header}
+from setuptools import setup
+
+_package_name = "{package_name}"
+_package_version = "{package_version}"
+
+setup(
+    name=_package_name,
+    version=_package_version,
+    install_requires=[
+        "avro-gen3=={_avrogen_version}",
+        "acryl-datahub",
+    ],
+    entry_points={{
+        "datahub.custom_packages": [
+            "models={python_package_name}.models.schema_classes",
+        ],
+    }},
+)
+"""
+    )
+
+    # TODO add a README.md?
+    click.echo("Building package...")
+    subprocess.run(["python", "-m", "build", str(package_path)])
+
+    click.echo()
+    click.secho(f"Generated package at {package_path}", fg="green")
+    click.echo(
+        "This package should be installed alongside the main acryl-datahub package."
+    )
+    click.echo()
+    click.echo(f"Install the custom package locally with `pip install {package_path}`")
+    click.echo(
+        f"To enable others to use it, share the file at {package_path}/dist/*.whl and have them install it with `pip install <wheel file>.whl`"
+    )
+    click.echo(
+        f"Alternatively, publish it to PyPI with `twine upload {package_path}/dist/*`"
+    )
+
+
+if __name__ == "__main__":
+    generate()
diff --git a/metadata-ingestion/scripts/custom_package_codegen.sh b/metadata-ingestion/scripts/custom_package_codegen.sh
new file mode 100755
index 0000000000000..aec6293a4ef45
--- /dev/null
+++ b/metadata-ingestion/scripts/custom_package_codegen.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+set -euo pipefail
+
+OUTDIR=./custom-package
+PACKAGE_NAME="${1:?package name is required}"
+PACKAGE_VERSION="${2:?package version is required}"
+
+# Note: this assumes that datahub has already been built with `./gradlew build`.
+DATAHUB_ROOT=..
+
+SCHEMAS_PDL="$DATAHUB_ROOT/metadata-models/src/main/pegasus/com/linkedin"
+SCHEMAS_AVSC="$DATAHUB_ROOT/metadata-events/mxe-schemas/src/renamed/avro/com/linkedin"
+ENTITY_REGISTRY="$DATAHUB_ROOT/metadata-models/src/main/resources/entity-registry.yml"
+
+rm -r $OUTDIR 2>/dev/null || true
+python scripts/custom_package_codegen.py $ENTITY_REGISTRY $SCHEMAS_PDL $SCHEMAS_AVSC $OUTDIR "$PACKAGE_NAME" "$PACKAGE_VERSION"
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index e748461b156ae..10e6ff554d9f8 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -36,10 +36,11 @@ def get_long_description():
     "click-default-group",
     "PyYAML",
     "toml>=0.10.0",
-    "entrypoints",
+    # In Python 3.10+, importlib_metadata is included in the standard library.
+    "importlib_metadata>=4.0.0; python_version < '3.10'",
     "docker",
     "expandvars>=0.6.5",
-    "avro-gen3==0.7.10",
+    "avro-gen3==0.7.11",
     # "avro-gen3 @ git+https://github.com/acryldata/avro_gen@master#egg=avro-gen3",
     "avro>=1.10.2,<1.11",
     "python-dateutil>=2.8.0",
@@ -425,7 +426,6 @@ def get_long_description():
     "types-termcolor>=1.0.0",
     "types-Deprecated",
     "types-protobuf>=4.21.0.1",
-    "types-tzlocal",
     "sqlalchemy2-stubs",
 }
 
@@ -643,6 +643,7 @@ def get_long_description():
         "datahub = datahub.ingestion.reporting.datahub_ingestion_run_summary_provider:DatahubIngestionRunSummaryProvider",
         "file = datahub.ingestion.reporting.file_reporter:FileReporter",
     ],
+    "datahub.custom_packages": [],
 }
 
 
@@ -709,6 +710,7 @@ def get_long_description():
                 ]
             )
         ),
+        "cloud": ["acryl-datahub-cloud"],
         "dev": list(dev_requirements),
         "testing-utils": list(test_api_requirements),  # To import `datahub.testing`
         "integration-tests": list(full_test_dev_requirements),
diff --git a/metadata-ingestion/src/datahub/cli/check_cli.py b/metadata-ingestion/src/datahub/cli/check_cli.py
index f20272ecd9dbf..f7996900f7a7a 100644
--- a/metadata-ingestion/src/datahub/cli/check_cli.py
+++ b/metadata-ingestion/src/datahub/cli/check_cli.py
@@ -131,7 +131,7 @@ def plugins(verbose: bool) -> None:
     """List the enabled ingestion plugins."""
 
     click.secho("Sources:", bold=True)
-    click.echo(source_registry.summary(verbose=verbose))
+    click.echo(source_registry.summary(verbose=verbose, col_width=25))
     click.echo()
     click.secho("Sinks:", bold=True)
     click.echo(sink_registry.summary(verbose=verbose))
diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py
index 5931bf89b010b..9b5716408f3e4 100644
--- a/metadata-ingestion/src/datahub/cli/ingest_cli.py
+++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py
@@ -10,7 +10,6 @@
 
 import click
 import click_spinner
-import tzlocal
 from click_default_group import DefaultGroup
 from tabulate import tabulate
 
@@ -248,17 +247,17 @@ async def run_ingestion_and_check_upgrade() -> int:
 @click.option(
     "--time-zone",
     type=str,
-    help=f"Timezone for the schedule. By default uses the timezone of the current system: {tzlocal.get_localzone_name()}.",
+    help="Timezone for the schedule in 'America/New_York' format. Uses UTC by default.",
     required=False,
-    default=tzlocal.get_localzone_name(),
+    default="UTC",
 )
 def deploy(
     name: str,
     config: str,
-    urn: str,
+    urn: Optional[str],
     executor_id: str,
-    cli_version: str,
-    schedule: str,
+    cli_version: Optional[str],
+    schedule: Optional[str],
     time_zone: str,
 ) -> None:
     """
@@ -276,8 +275,6 @@ def deploy(
         resolve_env_vars=False,
     )
 
-    graphql_query: str
-
     variables: dict = {
         "urn": urn,
         "name": name,
@@ -296,7 +293,7 @@ def deploy(
             exit()
         logger.info("Found recipe URN, will update recipe.")
 
-        graphql_query = textwrap.dedent(
+        graphql_query: str = textwrap.dedent(
             """
             mutation updateIngestionSource(
                 $urn: String!,
diff --git a/metadata-ingestion/src/datahub/ingestion/api/registry.py b/metadata-ingestion/src/datahub/ingestion/api/registry.py
index 56ea716948199..7d8192aff83d5 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/registry.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/registry.py
@@ -15,18 +15,17 @@
     Union,
 )
 
-import entrypoints
 import typing_inspect
 
 from datahub import __package_name__
 from datahub.configuration.common import ConfigurationError
 
-T = TypeVar("T")
+if sys.version_info < (3, 10):
+    from importlib_metadata import entry_points
+else:
+    from importlib.metadata import entry_points
 
-# TODO: The `entrypoints` library is in maintenance mode and is not actively developed.
-# We should switch to importlib.metadata once we drop support for Python 3.7.
-# See https://entrypoints.readthedocs.io/en/latest/ and
-# https://docs.python.org/3/library/importlib.metadata.html.
+T = TypeVar("T")
 
 
 def _is_importable(path: str) -> bool:
@@ -141,16 +140,8 @@ def register_from_entrypoint(self, entry_point_key: str) -> None:
         self._entrypoints.append(entry_point_key)
 
     def _load_entrypoint(self, entry_point_key: str) -> None:
-        entry_point: entrypoints.EntryPoint
-        for entry_point in entrypoints.get_group_all(entry_point_key):
-            name = entry_point.name
-
-            if entry_point.object_name is None:
-                path = entry_point.module_name
-            else:
-                path = f"{entry_point.module_name}:{entry_point.object_name}"
-
-            self.register_lazy(name, path)
+        for entry_point in entry_points(group=entry_point_key):
+            self.register_lazy(entry_point.name, entry_point.value)
 
     def _materialize_entrypoints(self) -> None:
         for entry_point_key in self._entrypoints:
diff --git a/metadata-ingestion/src/datahub/utilities/_custom_package_loader.py b/metadata-ingestion/src/datahub/utilities/_custom_package_loader.py
new file mode 100644
index 0000000000000..1b66258557406
--- /dev/null
+++ b/metadata-ingestion/src/datahub/utilities/_custom_package_loader.py
@@ -0,0 +1,43 @@
+import sys
+from typing import List, Optional
+
+if sys.version_info < (3, 10):
+    from importlib_metadata import EntryPoint, entry_points
+else:
+    from importlib.metadata import EntryPoint, entry_points
+
+
+_CUSTOM_PACKAGE_GROUP_KEY = "datahub.custom_packages"
+
+_MODELS_KEY = "models"
+
+
+class CustomPackageException(Exception):
+    pass
+
+
+def _get_all_registered_custom_packages() -> List[EntryPoint]:
+    return list(entry_points(group=_CUSTOM_PACKAGE_GROUP_KEY))
+
+
+def _get_custom_package_for_name(name: str) -> Optional[str]:
+    entrypoints = [
+        ep for ep in _get_all_registered_custom_packages() if ep.name == name
+    ]
+
+    if not entrypoints:
+        return None
+
+    if len(entrypoints) > 1:
+        all_package_options = [
+            entrypoint.dist.name for entrypoint in entrypoints if entrypoint.dist
+        ]
+        raise CustomPackageException(
+            f"Multiple custom packages registered for {name}: cannot pick between {all_package_options}"
+        )
+
+    return entrypoints[0].value
+
+
+def get_custom_models_package() -> Optional[str]:
+    return _get_custom_package_for_name(_MODELS_KEY)

From 930ad2c29b3d2b7aa6e80598b16420cb409da262 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= <sgomezvillamor@gmail.com>
Date: Sat, 23 Sep 2023 01:52:07 +0200
Subject: [PATCH 041/156] fix(docs): fixes link to developers guides (#8809)

---
 docs-website/src/pages/docs/index.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs-website/src/pages/docs/index.js b/docs-website/src/pages/docs/index.js
index a0462091a046d..0e8bfdcf3b9d7 100644
--- a/docs-website/src/pages/docs/index.js
+++ b/docs-website/src/pages/docs/index.js
@@ -180,8 +180,8 @@ const quickLinkContent = [
   {
     title: "Developer Guides",
     icon: <CodeTwoTone />,
-    description: "Interact with DataHub programmatically ",
-    to: "/docs/cli",
+    description: "Interact with DataHub programmatically",
+    to: "/docs/api/datahub-apis",
   },
   {
     title: "Feature Guides",

From 7f4395945e593c430b32e816e0848ba4ed8726ab Mon Sep 17 00:00:00 2001
From: siladitya <68184387+siladitya2@users.noreply.github.com>
Date: Sat, 23 Sep 2023 01:53:12 +0200
Subject: [PATCH 042/156] docs(authorization): correct policies example (#8833)

Co-authored-by: si-chakraborty <si.chakraborty@adevinta.com>
Co-authored-by: John Joyce <john@acryl.io>
Co-authored-by: Aseem Bansal <asmbansal2@gmail.com>
---
 docs/authorization/policies.md | 41 ++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/docs/authorization/policies.md b/docs/authorization/policies.md
index 27d8b15e5a73a..e3606f2a3e48d 100644
--- a/docs/authorization/policies.md
+++ b/docs/authorization/policies.md
@@ -145,28 +145,31 @@ For example, the following resource filter will apply the policy to datasets, ch
 
 ```json
 {
-  "resource": {
-    "criteria": [
-      {
-        "field": "resource_type",
-        "values": [
-          "dataset",
-          "chart",
-          "dashboard"
-        ],
-        "condition": "EQUALS"
-      },
-      {
-        "field": "domain",
-        "values": [
-          "urn:li:domain:domain1"
-        ],
-        "condition": "EQUALS"
+    "resources": {
+      "filter": {
+        "criteria": [
+          {
+            "field": "RESOURCE_TYPE",
+            "condition": "EQUALS",
+            "values": [
+              "dataset",
+              "chart",
+              "dashboard"
+            ]
+          },
+          {
+            "field": "DOMAIN",
+            "values": [
+              "urn:li:domain:domain1"
+            ],
+            "condition": "EQUALS"
+          }
+        ]
       }
-    ]
-  }
+    }
 }
 ```
+Where `resources` is inside the `info` aspect of a Policy.
 
 Supported fields are as follows
 

From e254a50b5076e13f242ab79020223e2d69cd1fd3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= <sgomezvillamor@gmail.com>
Date: Sat, 23 Sep 2023 01:54:34 +0200
Subject: [PATCH 043/156] fix(report): too long report causes
 MSG_SIZE_TOO_LARGE in kafka (#8857)

---
 .../ingestion/source/sql/sql_common.py        | 42 +++++++------------
 1 file changed, 15 insertions(+), 27 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
index b5458a42192fc..112defe76d957 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
@@ -367,12 +367,12 @@ def __init__(self, config: SQLCommonConfig, ctx: PipelineContext, platform: str)
             )
 
     def warn(self, log: logging.Logger, key: str, reason: str) -> None:
-        self.report.report_warning(key, reason)
+        self.report.report_warning(key, reason[:100])
         log.warning(f"{key} => {reason}")
 
     def error(self, log: logging.Logger, key: str, reason: str) -> None:
-        self.report.report_failure(key, reason)
-        log.error(f"{key} => {reason}")
+        self.report.report_failure(key, reason[:100])
+        log.error(f"{key} => {reason}\n{traceback.format_exc()}")
 
     def get_inspectors(self) -> Iterable[Inspector]:
         # This method can be overridden in the case that you want to dynamically
@@ -528,10 +528,8 @@ def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit
                 try:
                     self.add_profile_metadata(inspector)
                 except Exception as e:
-                    logger.warning(
-                        "Failed to get enrichment data for profiler", exc_info=True
-                    )
-                    self.report.report_warning(
+                    self.warn(
+                        logger,
                         "profile_metadata",
                         f"Failed to get enrichment data for profile {e}",
                     )
@@ -638,14 +636,9 @@ def loop_tables(  # noqa: C901
                         dataset_name, inspector, schema, table, sql_config
                     )
                 except Exception as e:
-                    logger.warning(
-                        f"Unable to ingest {schema}.{table} due to an exception.\n {traceback.format_exc()}"
-                    )
-                    self.report.report_warning(
-                        f"{schema}.{table}", f"Ingestion error: {e}"
-                    )
+                    self.warn(logger, f"{schema}.{table}", f"Ingestion error: {e}")
         except Exception as e:
-            self.report.report_failure(f"{schema}", f"Tables error: {e}")
+            self.error(logger, f"{schema}", f"Tables error: {e}")
 
     def add_information_for_schema(self, inspector: Inspector, schema: str) -> None:
         pass
@@ -806,9 +799,10 @@ def _get_columns(
         try:
             columns = inspector.get_columns(table, schema)
             if len(columns) == 0:
-                self.report.report_warning(MISSING_COLUMN_INFO, dataset_name)
+                self.warn(logger, MISSING_COLUMN_INFO, dataset_name)
         except Exception as e:
-            self.report.report_warning(
+            self.warn(
+                logger,
                 dataset_name,
                 f"unable to get column information due to an error -> {e}",
             )
@@ -903,14 +897,9 @@ def loop_views(
                         sql_config=sql_config,
                     )
                 except Exception as e:
-                    logger.warning(
-                        f"Unable to ingest view {schema}.{view} due to an exception.\n {traceback.format_exc()}"
-                    )
-                    self.report.report_warning(
-                        f"{schema}.{view}", f"Ingestion error: {e}"
-                    )
+                    self.warn(logger, f"{schema}.{view}", f"Ingestion error: {e}")
         except Exception as e:
-            self.report.report_failure(f"{schema}", f"Views error: {e}")
+            self.error(logger, f"{schema}", f"Views error: {e}")
 
     def _process_view(
         self,
@@ -924,9 +913,7 @@ def _process_view(
             columns = inspector.get_columns(view, schema)
         except KeyError:
             # For certain types of views, we are unable to fetch the list of columns.
-            self.report.report_warning(
-                dataset_name, "unable to get schema for this view"
-            )
+            self.warn(logger, dataset_name, "unable to get schema for this view")
             schema_metadata = None
         else:
             schema_fields = self.get_schema_fields(dataset_name, columns)
@@ -1112,7 +1099,8 @@ def loop_profiler_requests(
             if partition is None and self.is_table_partitioned(
                 database=None, schema=schema, table=table
             ):
-                self.report.report_warning(
+                self.warn(
+                    logger,
                     "profile skipped as partitioned table is empty or partition id was invalid",
                     dataset_name,
                 )

From 5bb9f30895ff90f9459e56c168cf6920f64fd9b8 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Fri, 22 Sep 2023 16:55:15 -0700
Subject: [PATCH 044/156] docs(ingest/lookml): add guide on debugging lkml
 parse errors (#8890)

---
 .../docs/sources/looker/lookml_post.md        | 27 ++++++++++++++++---
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/metadata-ingestion/docs/sources/looker/lookml_post.md b/metadata-ingestion/docs/sources/looker/lookml_post.md
index 818cb681c4e90..8ebbab4b9ed48 100644
--- a/metadata-ingestion/docs/sources/looker/lookml_post.md
+++ b/metadata-ingestion/docs/sources/looker/lookml_post.md
@@ -2,11 +2,11 @@
 
 :::note
 
-The integration can use an SQL parser to try to parse the tables the views depends on. 
+The integration can use an SQL parser to try to parse the tables the views depends on.
 
 :::
 
-This parsing is disabled by default, but can be enabled by setting `parse_table_names_from_sql: True`.  The default parser is based on the [`sqllineage`](https://pypi.org/project/sqllineage/) package.
+This parsing is disabled by default, but can be enabled by setting `parse_table_names_from_sql: True`. The default parser is based on the [`sqllineage`](https://pypi.org/project/sqllineage/) package.
 As this package doesn't officially support all the SQL dialects that Looker supports, the result might not be correct. You can, however, implement a custom parser and take it into use by setting the `sql_parser` configuration value. A custom SQL parser must inherit from `datahub.utilities.sql_parser.SQLParser`
 and must be made available to Datahub by ,for example, installing it. The configuration then needs to be set to `module_name.ClassName` of the parser.
 
@@ -15,12 +15,14 @@ and must be made available to Datahub by ,for example, installing it. The config
 Looker projects support organization as multiple git repos, with [remote includes that can refer to projects that are stored in a different repo](https://cloud.google.com/looker/docs/importing-projects#include_files_from_an_imported_project). If your Looker implementation uses multi-project setup, you can configure the LookML source to pull in metadata from your remote projects as well.
 
 If you are using local or remote dependencies, you will see include directives in your lookml files that look like this:
+
 ```
 include: "//e_flights/views/users.view.lkml"
 include: "//e_commerce/public/orders.view.lkml"
 ```
 
 Also, you will see projects that are being referred to listed in your `manifest.lkml` file. Something like this:
+
 ```
 project_name: this_project
 
@@ -34,9 +36,9 @@ remote_dependency: ga_360_block {
 }
 ```
 
-
 To ingest Looker repositories that are including files defined in other projects, you will need to use the `project_dependencies` directive within the configuration section.
 Consider the following scenario:
+
 - Your primary project refers to a remote project called `my_remote_project`
 - The remote project is homed in the GitHub repo `my_org/my_remote_project`
 - You have provisioned a GitHub deploy key and stored the credential in the environment variable (or UI secret), `${MY_REMOTE_PROJECT_DEPLOY_KEY}`
@@ -71,6 +73,23 @@ source:
 
 :::note
 
-This is not the same as ingesting the remote project as a primary Looker project because DataHub will not be processing the model files that might live in the remote project. If you want to additionally include the views accessible via the models in the remote project, create a second recipe where your remote project is the primary project. 
+This is not the same as ingesting the remote project as a primary Looker project because DataHub will not be processing the model files that might live in the remote project. If you want to additionally include the views accessible via the models in the remote project, create a second recipe where your remote project is the primary project.
 
 :::
+
+### Debugging LookML Parsing Errors
+
+If you see messages like `my_file.view.lkml': "failed to load view file: Unable to find a matching expression for '<literal>' on line 5"` in the failure logs, it indicates a parsing error for the LookML file.
+
+The first thing to check is that the Looker IDE can validate the file without issues. You can check this by clicking this "Validate LookML" button in the IDE when in development mode.
+
+If that's not the issue, it might be because DataHub's parser, which is based on the [joshtemple/lkml](https://github.com/joshtemple/lkml) library, is slightly more strict than the official Looker parser.
+Note that there's currently only one known discrepancy between the two parsers, and it's related to using [multiple colon characters](https://github.com/joshtemple/lkml/issues/82) when defining parameters.
+
+To check if DataHub can parse your LookML file syntax, you can use the `lkml` CLI tool. If this raises an exception, DataHub will fail to parse the file.
+
+```sh
+pip install lkml
+
+lkml path/to/my_file.view.lkml
+```

From 5c40390a923a2a44f23290e4f1a2168820993fca Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Sat, 23 Sep 2023 05:41:42 +0530
Subject: [PATCH 045/156] feat(ingest/kafka): support metadata mapping from
 kafka avro schemas (#8825)

Co-authored-by: Daniel Messias <danielcmessias@gmail.com>
Co-authored-by: Deepankarkr <deepankar.kumar@gslab.com>
Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
---
 .../docs/sources/kafka/kafka.md               |  83 +++++++++
 .../ingestion/extractor/schema_util.py        |  92 ++++++++--
 .../source/confluent_schema_registry.py       |  22 ++-
 .../src/datahub/ingestion/source/kafka.py     |  78 ++++++++-
 .../datahub/utilities/hive_schema_to_avro.py  |   2 +-
 .../src/datahub/utilities/mapping.py          |  15 +-
 .../integration/kafka/kafka_mces_golden.json  | 164 +++++++++++++++---
 .../tests/integration/kafka/value_schema.avsc |  10 +-
 .../tests/unit/test_kafka_source.py           | 155 +++++++++++++++++
 metadata-ingestion/tests/unit/test_mapping.py |  48 +++++
 .../tests/unit/test_schema_util.py            | 109 ++++++++++++
 11 files changed, 730 insertions(+), 48 deletions(-)

diff --git a/metadata-ingestion/docs/sources/kafka/kafka.md b/metadata-ingestion/docs/sources/kafka/kafka.md
index 2e8baa9516d17..9fdfc3a3af1d0 100644
--- a/metadata-ingestion/docs/sources/kafka/kafka.md
+++ b/metadata-ingestion/docs/sources/kafka/kafka.md
@@ -130,3 +130,86 @@ message MessageWithMap {
   repeated Map1Entry map_1 = 1;
 }
 ```
+
+### Enriching DataHub metadata with automated meta mapping
+
+:::note
+Meta mapping is currently only available for Avro schemas
+:::
+
+Avro schemas are permitted to have additional attributes not defined by the specification as arbitrary metadata. A common pattern is to utilize this for business metadata. The Kafka source has the ability to transform this directly into DataHub Owners, Tags and Terms.
+
+#### Simple tags
+
+If you simply have a list of tags embedded into an Avro schema (either at the top-level or for an individual field), you can use the `schema_tags_field` config.
+
+Example Avro schema:
+
+```json
+{
+  "name": "sampleRecord",
+  "type": "record",
+  "tags": ["tag1", "tag2"],
+  "fields": [{
+    "name": "field_1",
+    "type": "string",
+    "tags": ["tag3", "tag4"]
+  }]
+}
+```
+
+The name of the field containing a list of tags can be configured with the `schema_tags_field` property:
+
+```yaml
+config:
+  schema_tags_field: tags
+```
+
+#### meta mapping
+
+You can also map specific Avro fields into Owners, Tags and Terms using meta
+mapping.
+
+Example Avro schema:
+
+```json
+{
+  "name": "sampleRecord",
+  "type": "record",
+  "owning_team": "@Data-Science",
+  "data_tier": "Bronze",
+  "fields": [{
+    "name": "field_1",
+    "type": "string",
+    "gdpr": {
+      "pii": true
+    }
+  }]
+}
+```
+
+This can be mapped to DataHub metadata with `meta_mapping` config:
+
+```yaml
+config:
+  meta_mapping:
+    owning_team:
+      match: "^@(.*)"
+      operation: "add_owner"
+      config:
+        owner_type: group
+    data_tier:
+      match: "Bronze|Silver|Gold"
+      operation: "add_term"
+      config:
+        term: "{{ $match }}"
+  field_meta_mapping:
+    gdpr.pii:
+      match: true
+      operation: "add_tag"
+      config:
+        tag: "pii"
+```
+
+The underlying implementation is similar to [dbt meta mapping](https://datahubproject.io/docs/generated/ingestion/sources/dbt#dbt-meta-automated-mappings), which has more detailed examples that can be used for reference.
+
diff --git a/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py b/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py
index 75de18e9037ee..4acf99a50e50e 100644
--- a/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py
+++ b/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py
@@ -4,6 +4,7 @@
 
 import avro.schema
 
+from datahub.emitter import mce_builder
 from datahub.metadata.com.linkedin.pegasus2avro.schema import (
     ArrayTypeClass,
     BooleanTypeClass,
@@ -21,7 +22,7 @@
     TimeTypeClass,
     UnionTypeClass,
 )
-from datahub.metadata.schema_classes import GlobalTagsClass, TagAssociationClass
+from datahub.utilities.mapping import Constants, OperationProcessor
 
 """A helper file for Avro schema -> MCE schema transformations"""
 
@@ -98,7 +99,14 @@ class AvroToMceSchemaConverter:
         "uuid": StringTypeClass,
     }
 
-    def __init__(self, is_key_schema: bool, default_nullable: bool = False) -> None:
+    def __init__(
+        self,
+        is_key_schema: bool,
+        default_nullable: bool = False,
+        meta_mapping_processor: Optional[OperationProcessor] = None,
+        schema_tags_field: Optional[str] = None,
+        tag_prefix: Optional[str] = None,
+    ) -> None:
         # Tracks the prefix name stack for nested name generation.
         self._prefix_name_stack: PrefixNameStack = [self.version_string]
         # Tracks the fields on the current path.
@@ -112,6 +120,10 @@ def __init__(self, is_key_schema: bool, default_nullable: bool = False) -> None:
         if is_key_schema:
             # Helps maintain backwards-compatibility. Annotation for any field that is part of key-schema.
             self._prefix_name_stack.append("[key=True]")
+        # Meta mapping
+        self._meta_mapping_processor = meta_mapping_processor
+        self._schema_tags_field = schema_tags_field
+        self._tag_prefix = tag_prefix
         # Map of avro schema type to the conversion handler
         self._avro_type_to_mce_converter_map: Dict[
             avro.schema.Schema,
@@ -317,7 +329,25 @@ def emit(self) -> Generator[SchemaField, None, None]:
                 merged_props.update(self._schema.other_props)
                 merged_props.update(schema.other_props)
 
-                tags = None
+                # Parse meta_mapping
+                meta_aspects: Dict[str, Any] = {}
+                if self._converter._meta_mapping_processor:
+                    meta_aspects = self._converter._meta_mapping_processor.process(
+                        merged_props
+                    )
+
+                tags: List[str] = []
+                if self._converter._schema_tags_field:
+                    for tag in merged_props.get(self._converter._schema_tags_field, []):
+                        tags.append(self._converter._tag_prefix + tag)
+
+                meta_tags_aspect = meta_aspects.get(Constants.ADD_TAG_OPERATION)
+                if meta_tags_aspect:
+                    tags += [
+                        tag_association.tag[len("urn:li:tag:") :]
+                        for tag_association in meta_tags_aspect.tags
+                    ]
+
                 if "deprecated" in merged_props:
                     description = (
                         f"<span style=\"color:red\">DEPRECATED: {merged_props['deprecated']}</span>\n"
@@ -325,9 +355,13 @@ def emit(self) -> Generator[SchemaField, None, None]:
                         if description
                         else ""
                     )
-                    tags = GlobalTagsClass(
-                        tags=[TagAssociationClass(tag="urn:li:tag:Deprecated")]
-                    )
+                    tags.append("Deprecated")
+
+                tags_aspect = None
+                if tags:
+                    tags_aspect = mce_builder.make_global_tag_aspect_with_tag_list(tags)
+
+                meta_terms_aspect = meta_aspects.get(Constants.ADD_TERM_OPERATION)
 
                 logical_type_name: Optional[str] = (
                     # logicalType nested inside type
@@ -349,7 +383,8 @@ def emit(self) -> Generator[SchemaField, None, None]:
                     recursive=False,
                     nullable=self._converter._is_nullable(schema),
                     isPartOfKey=self._converter._is_key_schema,
-                    globalTags=tags,
+                    globalTags=tags_aspect,
+                    glossaryTerms=meta_terms_aspect,
                     jsonProps=json.dumps(merged_props) if merged_props else None,
                 )
                 yield field
@@ -447,7 +482,9 @@ def _gen_from_non_field_nested_schemas(
         actual_schema = self._get_underlying_type_if_option_as_union(schema, schema)
 
         with AvroToMceSchemaConverter.SchemaFieldEmissionContextManager(
-            schema, actual_schema, self
+            schema,
+            actual_schema,
+            self,
         ) as fe_schema:
             if isinstance(
                 actual_schema,
@@ -478,7 +515,9 @@ def _gen_non_nested_to_mce_fields(
     ) -> Generator[SchemaField, None, None]:
         """Handles generation of MCE SchemaFields for non-nested AVRO types."""
         with AvroToMceSchemaConverter.SchemaFieldEmissionContextManager(
-            schema, schema, self
+            schema,
+            schema,
+            self,
         ) as non_nested_emitter:
             yield from non_nested_emitter.emit()
 
@@ -496,9 +535,12 @@ def _to_mce_fields(
     @classmethod
     def to_mce_fields(
         cls,
-        avro_schema_string: str,
+        avro_schema: avro.schema.Schema,
         is_key_schema: bool,
         default_nullable: bool = False,
+        meta_mapping_processor: Optional[OperationProcessor] = None,
+        schema_tags_field: Optional[str] = None,
+        tag_prefix: Optional[str] = None,
     ) -> Generator[SchemaField, None, None]:
         """
         Converts a key or value type AVRO schema string to appropriate MCE SchemaFields.
@@ -506,8 +548,14 @@ def to_mce_fields(
         :param is_key_schema: True if it is a key-schema.
         :return: An MCE SchemaField generator.
         """
-        avro_schema = avro.schema.parse(avro_schema_string)
-        converter = cls(is_key_schema, default_nullable)
+        # avro_schema = avro.schema.parse(avro_schema)
+        converter = cls(
+            is_key_schema,
+            default_nullable,
+            meta_mapping_processor,
+            schema_tags_field,
+            tag_prefix,
+        )
         yield from converter._to_mce_fields(avro_schema)
 
 
@@ -516,28 +564,40 @@ def to_mce_fields(
 
 
 def avro_schema_to_mce_fields(
-    avro_schema_string: str,
+    avro_schema: Union[avro.schema.Schema, str],
     is_key_schema: bool = False,
     default_nullable: bool = False,
+    meta_mapping_processor: Optional[OperationProcessor] = None,
+    schema_tags_field: Optional[str] = None,
+    tag_prefix: Optional[str] = None,
     swallow_exceptions: bool = True,
 ) -> List[SchemaField]:
     """
     Converts an avro schema into schema fields compatible with MCE.
-    :param avro_schema_string: String representation of the AVRO schema.
+    :param avro_schema: AVRO schema, either as a string or as an avro.schema.Schema object.
     :param is_key_schema: True if it is a key-schema. Default is False (value-schema).
     :param swallow_exceptions: True if the caller wants exceptions to be suppressed
+    :param action_processor: Optional OperationProcessor to be used for meta mappings
     :return: The list of MCE compatible SchemaFields.
     """
 
     try:
+        if isinstance(avro_schema, str):
+            avro_schema = avro.schema.parse(avro_schema)
+
         return list(
             AvroToMceSchemaConverter.to_mce_fields(
-                avro_schema_string, is_key_schema, default_nullable
+                avro_schema,
+                is_key_schema,
+                default_nullable,
+                meta_mapping_processor,
+                schema_tags_field,
+                tag_prefix,
             )
         )
     except Exception:
         if swallow_exceptions:
-            logger.exception(f"Failed to parse {avro_schema_string} into mce fields.")
+            logger.exception(f"Failed to parse {avro_schema} into mce fields.")
             return []
         else:
             raise
diff --git a/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py b/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py
index 0bdcb115b377c..54475cb509621 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py
@@ -4,6 +4,7 @@
 from hashlib import md5
 from typing import Any, List, Optional, Set, Tuple
 
+import avro.schema
 import jsonref
 from confluent_kafka.schema_registry.schema_registry_client import (
     RegisteredSchema,
@@ -22,6 +23,8 @@
     SchemaField,
     SchemaMetadata,
 )
+from datahub.metadata.schema_classes import OwnershipSourceTypeClass
+from datahub.utilities.mapping import OperationProcessor
 
 logger = logging.getLogger(__name__)
 
@@ -59,6 +62,14 @@ def __init__(
         except Exception as e:
             logger.warning(f"Failed to get subjects from schema registry: {e}")
 
+        self.field_meta_processor = OperationProcessor(
+            self.source_config.field_meta_mapping,
+            self.source_config.tag_prefix,
+            OwnershipSourceTypeClass.SERVICE,
+            self.source_config.strip_user_ids_from_email,
+            match_nested_props=True,
+        )
+
     @classmethod
     def create(
         cls, source_config: KafkaSourceConfig, report: KafkaSourceReport
@@ -290,10 +301,19 @@ def _get_schema_fields(
         fields: List[SchemaField] = []
         if schema.schema_type == "AVRO":
             cleaned_str: str = self.get_schema_str_replace_confluent_ref_avro(schema)
+            avro_schema = avro.schema.parse(cleaned_str)
+
             # "value.id" or "value.[type=string]id"
             fields = schema_util.avro_schema_to_mce_fields(
-                cleaned_str, is_key_schema=is_key_schema
+                avro_schema,
+                is_key_schema=is_key_schema,
+                meta_mapping_processor=self.field_meta_processor
+                if self.source_config.enable_meta_mapping
+                else None,
+                schema_tags_field=self.source_config.schema_tags_field,
+                tag_prefix=self.source_config.tag_prefix,
             )
+
         elif schema.schema_type == "PROTOBUF":
             imported_schemas: List[
                 ProtobufSchema
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka.py b/metadata-ingestion/src/datahub/ingestion/source/kafka.py
index 61f6103347eb3..566304e1999b7 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/kafka.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka.py
@@ -5,6 +5,7 @@
 from enum import Enum
 from typing import Any, Dict, Iterable, List, Optional, Type
 
+import avro.schema
 import confluent_kafka
 import confluent_kafka.admin
 import pydantic
@@ -18,6 +19,7 @@
 from datahub.configuration.common import AllowDenyPattern
 from datahub.configuration.kafka import KafkaConsumerConnectionConfig
 from datahub.configuration.source_common import DatasetSourceConfigMixin
+from datahub.emitter import mce_builder
 from datahub.emitter.mce_builder import (
     make_data_platform_urn,
     make_dataplatform_instance_urn,
@@ -56,8 +58,10 @@
     DataPlatformInstanceClass,
     DatasetPropertiesClass,
     KafkaSchemaClass,
+    OwnershipSourceTypeClass,
     SubTypesClass,
 )
+from datahub.utilities.mapping import Constants, OperationProcessor
 from datahub.utilities.registries.domain_registry import DomainRegistry
 
 logger = logging.getLogger(__name__)
@@ -89,6 +93,29 @@ class KafkaSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin):
         default="datahub.ingestion.source.confluent_schema_registry.ConfluentSchemaRegistry",
         description="The fully qualified implementation class(custom) that implements the KafkaSchemaRegistryBase interface.",
     )
+    schema_tags_field = pydantic.Field(
+        default="tags",
+        description="The field name in the schema metadata that contains the tags to be added to the dataset.",
+    )
+    enable_meta_mapping = pydantic.Field(
+        default=True,
+        description="When enabled, applies the mappings that are defined through the meta_mapping directives.",
+    )
+    meta_mapping: Dict = pydantic.Field(
+        default={},
+        description="mapping rules that will be executed against top-level schema properties. Refer to the section below on meta automated mappings.",
+    )
+    field_meta_mapping: Dict = pydantic.Field(
+        default={},
+        description="mapping rules that will be executed against field-level schema properties. Refer to the section below on meta automated mappings.",
+    )
+    strip_user_ids_from_email: bool = pydantic.Field(
+        default=False,
+        description="Whether or not to strip email id while adding owners using meta mappings.",
+    )
+    tag_prefix: str = pydantic.Field(
+        default="", description="Prefix added to tags during ingestion."
+    )
     ignore_warnings_on_schema_type: bool = pydantic.Field(
         default=False,
         description="Disables warnings reported for non-AVRO/Protobuf value or key schemas if set.",
@@ -167,6 +194,14 @@ def __init__(self, config: KafkaSourceConfig, ctx: PipelineContext):
                 graph=self.ctx.graph,
             )
 
+        self.meta_processor = OperationProcessor(
+            self.source_config.meta_mapping,
+            self.source_config.tag_prefix,
+            OwnershipSourceTypeClass.SERVICE,
+            self.source_config.strip_user_ids_from_email,
+            match_nested_props=True,
+        )
+
     def init_kafka_admin_client(self) -> None:
         try:
             # TODO: Do we require separate config than existing consumer_config ?
@@ -227,7 +262,6 @@ def _extract_record(
         logger.debug(f"topic = {topic}")
 
         AVRO = "AVRO"
-        DOC_KEY = "doc"
 
         # 1. Create the default dataset snapshot for the topic.
         dataset_name = topic
@@ -261,8 +295,8 @@ def _extract_record(
             topic, topic_detail, extra_topic_config
         )
 
-        # 4. Set dataset's description as top level doc, if topic schema type is avro
-        description = None
+        # 4. Set dataset's description, tags, ownership, etc, if topic schema type is avro
+        description: Optional[str] = None
         if (
             schema_metadata is not None
             and isinstance(schema_metadata.platformSchema, KafkaSchemaClass)
@@ -271,9 +305,41 @@ def _extract_record(
             # Point to note:
             # In Kafka documentSchema and keySchema both contains "doc" field.
             # DataHub Dataset "description" field is mapped to documentSchema's "doc" field.
-            schema = json.loads(schema_metadata.platformSchema.documentSchema)
-            if isinstance(schema, dict):
-                description = schema.get(DOC_KEY)
+
+            avro_schema = avro.schema.parse(
+                schema_metadata.platformSchema.documentSchema
+            )
+            description = avro_schema.doc
+            # set the tags
+            all_tags: List[str] = []
+            for tag in avro_schema.other_props.get(
+                self.source_config.schema_tags_field, []
+            ):
+                all_tags.append(self.source_config.tag_prefix + tag)
+
+            if self.source_config.enable_meta_mapping:
+                meta_aspects = self.meta_processor.process(avro_schema.other_props)
+
+                meta_owners_aspects = meta_aspects.get(Constants.ADD_OWNER_OPERATION)
+                if meta_owners_aspects:
+                    dataset_snapshot.aspects.append(meta_owners_aspects)
+
+                meta_terms_aspect = meta_aspects.get(Constants.ADD_TERM_OPERATION)
+                if meta_terms_aspect:
+                    dataset_snapshot.aspects.append(meta_terms_aspect)
+
+                # Create the tags aspect
+                meta_tags_aspect = meta_aspects.get(Constants.ADD_TAG_OPERATION)
+                if meta_tags_aspect:
+                    all_tags += [
+                        tag_association.tag[len("urn:li:tag:") :]
+                        for tag_association in meta_tags_aspect.tags
+                    ]
+
+            if all_tags:
+                dataset_snapshot.aspects.append(
+                    mce_builder.make_global_tag_aspect_with_tag_list(all_tags)
+                )
 
         dataset_properties = DatasetPropertiesClass(
             name=topic, customProperties=custom_props, description=description
diff --git a/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py b/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py
index 8865254e88579..4fcef990ae4f4 100644
--- a/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py
+++ b/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py
@@ -269,7 +269,7 @@ def get_schema_fields_for_hive_column(
             hive_column_name=hive_column_name, hive_column_type=hive_column_type
         )
         schema_fields = avro_schema_to_mce_fields(
-            avro_schema_string=json.dumps(avro_schema_json),
+            avro_schema=json.dumps(avro_schema_json),
             default_nullable=default_nullable,
             swallow_exceptions=False,
         )
diff --git a/metadata-ingestion/src/datahub/utilities/mapping.py b/metadata-ingestion/src/datahub/utilities/mapping.py
index 32666ceecdf85..793eccfb22c7e 100644
--- a/metadata-ingestion/src/datahub/utilities/mapping.py
+++ b/metadata-ingestion/src/datahub/utilities/mapping.py
@@ -1,6 +1,8 @@
 import contextlib
 import logging
+import operator
 import re
+from functools import reduce
 from typing import Any, Dict, List, Match, Optional, Union
 
 from datahub.emitter import mce_builder
@@ -94,11 +96,13 @@ def __init__(
         tag_prefix: str = "",
         owner_source_type: Optional[str] = None,
         strip_owner_email_id: bool = False,
+        match_nested_props: bool = False,
     ):
         self.operation_defs = operation_defs
         self.tag_prefix = tag_prefix
         self.strip_owner_email_id = strip_owner_email_id
         self.owner_source_type = owner_source_type
+        self.match_nested_props = match_nested_props
 
     def process(self, raw_props: Dict[str, Any]) -> Dict[str, Any]:
         # Defining the following local variables -
@@ -121,9 +125,18 @@ def process(self, raw_props: Dict[str, Any]) -> Dict[str, Any]:
                 )
                 if not operation_type or not operation_config:
                     continue
+                raw_props_value = raw_props.get(operation_key)
+                if not raw_props_value and self.match_nested_props:
+                    try:
+                        raw_props_value = reduce(
+                            operator.getitem, operation_key.split("."), raw_props
+                        )
+                    except KeyError:
+                        pass
+
                 maybe_match = self.get_match(
                     self.operation_defs[operation_key][Constants.MATCH],
-                    raw_props.get(operation_key),
+                    raw_props_value,
                 )
                 if maybe_match is not None:
                     operation = self.get_operation_value(
diff --git a/metadata-ingestion/tests/integration/kafka/kafka_mces_golden.json b/metadata-ingestion/tests/integration/kafka/kafka_mces_golden.json
index e51eaa10b8b10..7dd328168e84c 100644
--- a/metadata-ingestion/tests/integration/kafka/kafka_mces_golden.json
+++ b/metadata-ingestion/tests/integration/kafka/kafka_mces_golden.json
@@ -86,7 +86,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "kafka-test"
+        "runId": "kafka-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -103,7 +104,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "kafka-test"
+        "runId": "kafka-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -118,7 +120,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "kafka-test"
+        "runId": "kafka-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -144,10 +147,10 @@
                             "time": 0,
                             "actor": "urn:li:corpuser:unknown"
                         },
-                        "hash": "cc452cf58242cdb9d09cf33d657497d8",
+                        "hash": "a79a2fe3adab60b21d272a9cc3e93595",
                         "platformSchema": {
                             "com.linkedin.pegasus2avro.schema.KafkaSchema": {
-                                "documentSchema": "{\"type\":\"record\",\"name\":\"CreateUserRequest\",\"namespace\":\"io.codebrews.createuserrequest\",\"doc\":\"Value schema for kafka topic\",\"fields\":[{\"name\":\"email\",\"type\":\"string\"},{\"name\":\"firstName\",\"type\":\"string\"},{\"name\":\"lastName\",\"type\":\"string\"}]}",
+                                "documentSchema": "{\"type\":\"record\",\"name\":\"CreateUserRequest\",\"namespace\":\"io.codebrews.createuserrequest\",\"doc\":\"Value schema for kafka topic\",\"fields\":[{\"name\":\"email\",\"type\":\"string\",\"tags\":[\"Email\"]},{\"name\":\"firstName\",\"type\":\"string\",\"tags\":[\"Name\"]},{\"name\":\"lastName\",\"type\":\"string\",\"tags\":[\"Name\"]}],\"tags\":[\"PII\"]}",
                                 "documentSchemaType": "AVRO",
                                 "keySchema": "{\"type\":\"record\",\"name\":\"UserKey\",\"namespace\":\"io.codebrews.createuserrequest\",\"doc\":\"Key schema for kafka topic\",\"fields\":[{\"name\":\"id\",\"type\":\"long\"},{\"name\":\"namespace\",\"type\":\"string\"}]}",
                                 "keySchemaType": "AVRO"
@@ -188,7 +191,15 @@
                                 },
                                 "nativeDataType": "email",
                                 "recursive": false,
-                                "isPartOfKey": false
+                                "globalTags": {
+                                    "tags": [
+                                        {
+                                            "tag": "urn:li:tag:Email"
+                                        }
+                                    ]
+                                },
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"tags\": [\"Email\"]}"
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=CreateUserRequest].[type=string].firstName",
@@ -200,7 +211,15 @@
                                 },
                                 "nativeDataType": "firstName",
                                 "recursive": false,
-                                "isPartOfKey": false
+                                "globalTags": {
+                                    "tags": [
+                                        {
+                                            "tag": "urn:li:tag:Name"
+                                        }
+                                    ]
+                                },
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"tags\": [\"Name\"]}"
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=CreateUserRequest].[type=string].lastName",
@@ -212,7 +231,15 @@
                                 },
                                 "nativeDataType": "lastName",
                                 "recursive": false,
-                                "isPartOfKey": false
+                                "globalTags": {
+                                    "tags": [
+                                        {
+                                            "tag": "urn:li:tag:Name"
+                                        }
+                                    ]
+                                },
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"tags\": [\"Name\"]}"
                             }
                         ]
                     }
@@ -224,6 +251,15 @@
                         ]
                     }
                 },
+                {
+                    "com.linkedin.pegasus2avro.common.GlobalTags": {
+                        "tags": [
+                            {
+                                "tag": "urn:li:tag:PII"
+                            }
+                        ]
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
@@ -246,7 +282,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "kafka-test"
+        "runId": "kafka-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -263,7 +300,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "kafka-test"
+        "runId": "kafka-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -280,7 +318,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "kafka-test"
+        "runId": "kafka-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -295,7 +334,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "kafka-test"
+        "runId": "kafka-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -321,10 +361,10 @@
                             "time": 0,
                             "actor": "urn:li:corpuser:unknown"
                         },
-                        "hash": "dc1cf32c2688cc3d2d27fe6e856f06d2",
+                        "hash": "62c7c400ec5760797a59c45e59c2f2dc",
                         "platformSchema": {
                             "com.linkedin.pegasus2avro.schema.KafkaSchema": {
-                                "documentSchema": "{\"type\":\"record\",\"name\":\"CreateUserRequest\",\"namespace\":\"io.codebrews.createuserrequest\",\"doc\":\"Value schema for kafka topic\",\"fields\":[{\"name\":\"email\",\"type\":\"string\"},{\"name\":\"firstName\",\"type\":\"string\"},{\"name\":\"lastName\",\"type\":\"string\"}]}",
+                                "documentSchema": "{\"type\":\"record\",\"name\":\"CreateUserRequest\",\"namespace\":\"io.codebrews.createuserrequest\",\"doc\":\"Value schema for kafka topic\",\"fields\":[{\"name\":\"email\",\"type\":\"string\",\"tags\":[\"Email\"]},{\"name\":\"firstName\",\"type\":\"string\",\"tags\":[\"Name\"]},{\"name\":\"lastName\",\"type\":\"string\",\"tags\":[\"Name\"]}],\"tags\":[\"PII\"]}",
                                 "documentSchemaType": "AVRO",
                                 "keySchema": "\"string\"",
                                 "keySchemaType": "AVRO"
@@ -353,7 +393,15 @@
                                 },
                                 "nativeDataType": "email",
                                 "recursive": false,
-                                "isPartOfKey": false
+                                "globalTags": {
+                                    "tags": [
+                                        {
+                                            "tag": "urn:li:tag:Email"
+                                        }
+                                    ]
+                                },
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"tags\": [\"Email\"]}"
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=CreateUserRequest].[type=string].firstName",
@@ -365,7 +413,15 @@
                                 },
                                 "nativeDataType": "firstName",
                                 "recursive": false,
-                                "isPartOfKey": false
+                                "globalTags": {
+                                    "tags": [
+                                        {
+                                            "tag": "urn:li:tag:Name"
+                                        }
+                                    ]
+                                },
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"tags\": [\"Name\"]}"
                             },
                             {
                                 "fieldPath": "[version=2.0].[type=CreateUserRequest].[type=string].lastName",
@@ -377,7 +433,15 @@
                                 },
                                 "nativeDataType": "lastName",
                                 "recursive": false,
-                                "isPartOfKey": false
+                                "globalTags": {
+                                    "tags": [
+                                        {
+                                            "tag": "urn:li:tag:Name"
+                                        }
+                                    ]
+                                },
+                                "isPartOfKey": false,
+                                "jsonProps": "{\"tags\": [\"Name\"]}"
                             }
                         ]
                     }
@@ -389,6 +453,15 @@
                         ]
                     }
                 },
+                {
+                    "com.linkedin.pegasus2avro.common.GlobalTags": {
+                        "tags": [
+                            {
+                                "tag": "urn:li:tag:PII"
+                            }
+                        ]
+                    }
+                },
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
@@ -411,7 +484,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "kafka-test"
+        "runId": "kafka-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -428,7 +502,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "kafka-test"
+        "runId": "kafka-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -443,7 +518,56 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "kafka-test"
+        "runId": "kafka-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "tag",
+    "entityUrn": "urn:li:tag:Email",
+    "changeType": "UPSERT",
+    "aspectName": "tagKey",
+    "aspect": {
+        "json": {
+            "name": "Email"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "kafka-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "tag",
+    "entityUrn": "urn:li:tag:Name",
+    "changeType": "UPSERT",
+    "aspectName": "tagKey",
+    "aspect": {
+        "json": {
+            "name": "Name"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "kafka-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "tag",
+    "entityUrn": "urn:li:tag:PII",
+    "changeType": "UPSERT",
+    "aspectName": "tagKey",
+    "aspect": {
+        "json": {
+            "name": "PII"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "kafka-test",
+        "lastRunId": "no-run-id-provided"
     }
 }
 ]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/kafka/value_schema.avsc b/metadata-ingestion/tests/integration/kafka/value_schema.avsc
index 788cb94c47a72..8cb6c42cb03f4 100644
--- a/metadata-ingestion/tests/integration/kafka/value_schema.avsc
+++ b/metadata-ingestion/tests/integration/kafka/value_schema.avsc
@@ -3,18 +3,22 @@
   "type": "record",
   "name": "CreateUserRequest",
   "doc": "Value schema for kafka topic",
+  "tags": ["PII"],
   "fields": [
     {
       "name": "email",
-      "type": "string"
+      "type": "string",
+      "tags": ["Email"]      
     },
     {
       "name": "firstName",
-      "type": "string"
+      "type": "string",
+      "tags": ["Name"]
     },
     {
       "name": "lastName",
-      "type": "string"
+      "type": "string",
+      "tags": ["Name"]
     }
   ]
 }
diff --git a/metadata-ingestion/tests/unit/test_kafka_source.py b/metadata-ingestion/tests/unit/test_kafka_source.py
index b48ebf12ee37a..603068780d0a7 100644
--- a/metadata-ingestion/tests/unit/test_kafka_source.py
+++ b/metadata-ingestion/tests/unit/test_kafka_source.py
@@ -1,3 +1,4 @@
+import json
 from itertools import chain
 from typing import Dict, Optional, Tuple
 from unittest.mock import MagicMock, patch
@@ -7,11 +8,17 @@
     RegisteredSchema,
     Schema,
 )
+from freezegun import freeze_time
 
 from datahub.emitter.mce_builder import (
+    OwnerType,
     make_dataplatform_instance_urn,
     make_dataset_urn,
     make_dataset_urn_with_platform_instance,
+    make_global_tag_aspect_with_tag_list,
+    make_glossary_terms_aspect_from_urn_list,
+    make_owner_urn,
+    make_ownership_aspect_from_urn_list,
 )
 from datahub.ingestion.api.common import PipelineContext
 from datahub.ingestion.api.workunit import MetadataWorkUnit
@@ -20,7 +27,10 @@
 from datahub.metadata.schema_classes import (
     BrowsePathsClass,
     DataPlatformInstanceClass,
+    GlobalTagsClass,
+    GlossaryTermsClass,
     KafkaSchemaClass,
+    OwnershipClass,
     SchemaMetadataClass,
 )
 
@@ -521,3 +531,148 @@ def test_kafka_source_succeeds_with_describe_configs_error(
     mock_admin_client_instance.describe_configs.assert_called_once()
 
     assert len(workunits) == 2
+
+
+@freeze_time("2023-09-20 10:00:00")
+@patch(
+    "datahub.ingestion.source.confluent_schema_registry.SchemaRegistryClient",
+    autospec=True,
+)
+@patch("datahub.ingestion.source.kafka.confluent_kafka.Consumer", autospec=True)
+def test_kafka_source_topic_meta_mappings(
+    mock_kafka_consumer, mock_schema_registry_client, mock_admin_client
+):
+    # Setup the topic to key/value schema mappings for all types of schema registry subject name strategies.
+    # <key=topic_name, value=(<key_schema>,<value_schema>)
+    topic_subject_schema_map: Dict[str, Tuple[RegisteredSchema, RegisteredSchema]] = {
+        "topic1": (
+            RegisteredSchema(
+                schema_id="schema_id_2",
+                schema=Schema(
+                    schema_str='{"type":"record", "name":"Topic1Key", "namespace": "test.acryl", "fields": [{"name":"t1key", "type": "string"}]}',
+                    schema_type="AVRO",
+                ),
+                subject="topic1-key",
+                version=1,
+            ),
+            RegisteredSchema(
+                schema_id="schema_id_1",
+                schema=Schema(
+                    schema_str=json.dumps(
+                        {
+                            "type": "record",
+                            "name": "Topic1Value",
+                            "namespace": "test.acryl",
+                            "fields": [{"name": "t1value", "type": "string"}],
+                            "owner": "@charles",
+                            "business_owner": "jdoe.last@gmail.com",
+                            "data_governance.team_owner": "Finance",
+                            "has_pii": True,
+                            "int_property": 1,
+                            "double_property": 2.5,
+                        }
+                    ),
+                    schema_type="AVRO",
+                ),
+                subject="topic1-value",
+                version=1,
+            ),
+        )
+    }
+
+    # Mock the kafka consumer
+    mock_kafka_instance = mock_kafka_consumer.return_value
+    mock_cluster_metadata = MagicMock()
+    mock_cluster_metadata.topics = {k: None for k in topic_subject_schema_map.keys()}
+    mock_kafka_instance.list_topics.return_value = mock_cluster_metadata
+
+    # Mock the schema registry client
+    # - mock get_subjects: all subjects in topic_subject_schema_map
+    mock_schema_registry_client.return_value.get_subjects.return_value = [
+        v.subject for v in chain(*topic_subject_schema_map.values())
+    ]
+
+    # - mock get_latest_version
+    def mock_get_latest_version(subject_name: str) -> Optional[RegisteredSchema]:
+        for registered_schema in chain(*topic_subject_schema_map.values()):
+            if registered_schema.subject == subject_name:
+                return registered_schema
+        return None
+
+    mock_schema_registry_client.return_value.get_latest_version = (
+        mock_get_latest_version
+    )
+
+    ctx = PipelineContext(run_id="test1")
+    kafka_source = KafkaSource.create(
+        {
+            "connection": {"bootstrap": "localhost:9092"},
+            "meta_mapping": {
+                "owner": {
+                    "match": "^@(.*)",
+                    "operation": "add_owner",
+                    "config": {"owner_type": "user"},
+                },
+                "business_owner": {
+                    "match": ".*",
+                    "operation": "add_owner",
+                    "config": {"owner_type": "user"},
+                },
+                "has_pii": {
+                    "match": True,
+                    "operation": "add_tag",
+                    "config": {"tag": "has_pii_test"},
+                },
+                "int_property": {
+                    "match": 1,
+                    "operation": "add_tag",
+                    "config": {"tag": "int_meta_property"},
+                },
+                "double_property": {
+                    "match": 2.5,
+                    "operation": "add_term",
+                    "config": {"term": "double_meta_property"},
+                },
+                "data_governance.team_owner": {
+                    "match": "Finance",
+                    "operation": "add_term",
+                    "config": {"term": "Finance_test"},
+                },
+            },
+        },
+        ctx,
+    )
+    workunits = [w for w in kafka_source.get_workunits()]
+    assert len(workunits) == 4
+    mce = workunits[0].metadata
+    assert isinstance(mce, MetadataChangeEvent)
+
+    ownership_aspect = [
+        asp for asp in mce.proposedSnapshot.aspects if isinstance(asp, OwnershipClass)
+    ][0]
+    assert ownership_aspect == make_ownership_aspect_from_urn_list(
+        [
+            make_owner_urn("charles", OwnerType.USER),
+            make_owner_urn("jdoe.last@gmail.com", OwnerType.USER),
+        ],
+        "SERVICE",
+    )
+
+    tags_aspect = [
+        asp for asp in mce.proposedSnapshot.aspects if isinstance(asp, GlobalTagsClass)
+    ][0]
+    assert tags_aspect == make_global_tag_aspect_with_tag_list(
+        ["has_pii_test", "int_meta_property"]
+    )
+
+    terms_aspect = [
+        asp
+        for asp in mce.proposedSnapshot.aspects
+        if isinstance(asp, GlossaryTermsClass)
+    ][0]
+    assert terms_aspect == make_glossary_terms_aspect_from_urn_list(
+        [
+            "urn:li:glossaryTerm:Finance_test",
+            "urn:li:glossaryTerm:double_meta_property",
+        ]
+    )
diff --git a/metadata-ingestion/tests/unit/test_mapping.py b/metadata-ingestion/tests/unit/test_mapping.py
index aea1d8ddd9a54..d69dd4a8a96b0 100644
--- a/metadata-ingestion/tests/unit/test_mapping.py
+++ b/metadata-ingestion/tests/unit/test_mapping.py
@@ -231,3 +231,51 @@ def test_operation_processor_advanced_matching_tags():
     tag_aspect: GlobalTagsClass = aspect_map["add_tag"]
     assert len(tag_aspect.tags) == 1
     assert tag_aspect.tags[0].tag == "urn:li:tag:case_4567"
+
+
+def test_operation_processor_matching_nested_props():
+    raw_props = {
+        "gdpr": {
+            "pii": True,
+        },
+    }
+    processor = OperationProcessor(
+        operation_defs={
+            "gdpr.pii": {
+                "match": True,
+                "operation": "add_tag",
+                "config": {"tag": "pii"},
+            },
+        },
+        owner_source_type="SOURCE_CONTROL",
+        match_nested_props=True,
+    )
+    aspect_map = processor.process(raw_props)
+    assert "add_tag" in aspect_map
+
+    tag_aspect: GlobalTagsClass = aspect_map["add_tag"]
+    assert len(tag_aspect.tags) == 1
+    assert tag_aspect.tags[0].tag == "urn:li:tag:pii"
+
+
+def test_operation_processor_matching_dot_props():
+    raw_props = {
+        "gdpr.pii": True,
+    }
+    processor = OperationProcessor(
+        operation_defs={
+            "gdpr.pii": {
+                "match": True,
+                "operation": "add_tag",
+                "config": {"tag": "pii"},
+            },
+        },
+        owner_source_type="SOURCE_CONTROL",
+        match_nested_props=True,
+    )
+    aspect_map = processor.process(raw_props)
+    assert "add_tag" in aspect_map
+
+    tag_aspect: GlobalTagsClass = aspect_map["add_tag"]
+    assert len(tag_aspect.tags) == 1
+    assert tag_aspect.tags[0].tag == "urn:li:tag:pii"
diff --git a/metadata-ingestion/tests/unit/test_schema_util.py b/metadata-ingestion/tests/unit/test_schema_util.py
index e81c335e178a2..0a111d700cf8c 100644
--- a/metadata-ingestion/tests/unit/test_schema_util.py
+++ b/metadata-ingestion/tests/unit/test_schema_util.py
@@ -6,7 +6,12 @@
 from typing import Dict, List, Type
 
 import pytest
+from freezegun import freeze_time
 
+from datahub.emitter.mce_builder import (
+    make_global_tag_aspect_with_tag_list,
+    make_glossary_terms_aspect_from_urn_list,
+)
 from datahub.ingestion.extractor.schema_util import avro_schema_to_mce_fields
 from datahub.metadata.com.linkedin.pegasus2avro.schema import (
     DateTypeClass,
@@ -15,6 +20,7 @@
     StringTypeClass,
     TimeTypeClass,
 )
+from datahub.utilities.mapping import OperationProcessor
 
 logger = logging.getLogger(__name__)
 
@@ -771,3 +777,106 @@ def test_ignore_exceptions():
 """
     fields: List[SchemaField] = avro_schema_to_mce_fields(malformed_schema)
     assert not fields
+
+
+@freeze_time("2023-09-12")
+def test_avro_schema_to_mce_fields_with_field_meta_mapping():
+    schema = """
+{
+  "type": "record",
+  "name": "Payment",
+  "namespace": "some.event.namespace",
+  "fields": [
+    {"name": "id", "type": "string"},
+    {"name": "amount", "type": "double", "doc": "amountDoc","has_pii": "False"},
+    {"name": "name","type": "string","default": "","has_pii": "True"},
+    {"name": "phoneNumber",
+     "type": [{
+         "type": "record",
+         "name": "PhoneNumber",
+         "doc": "testDoc",
+         "fields": [{
+             "name": "areaCode",
+             "type": "string",
+             "doc": "areaCodeDoc",
+             "default": ""
+             }, {
+             "name": "countryCode",
+             "type": "string",
+             "default": ""
+             }, {
+             "name": "prefix",
+             "type": "string",
+             "default": ""
+             }, {
+             "name": "number",
+             "type": "string",
+             "default": ""
+             }]
+         },
+         "null"
+     ],
+     "default": "null",
+     "has_pii": "True",
+     "glossary_field": "TERM_PhoneNumber"
+    },
+    {"name": "address",
+     "type": [{
+         "type": "record",
+         "name": "Address",
+         "fields": [{
+             "name": "street",
+             "type": "string",
+             "default": ""
+             }]
+         },
+         "null"
+     ],
+      "doc": "addressDoc",
+      "default": "null",
+      "has_pii": "True",
+      "glossary_field": "TERM_Address"
+    }
+  ]
+}
+"""
+    processor = OperationProcessor(
+        operation_defs={
+            "has_pii": {
+                "match": "True",
+                "operation": "add_tag",
+                "config": {"tag": "has_pii_test"},
+            },
+            "glossary_field": {
+                "match": "TERM_(.*)",
+                "operation": "add_term",
+                "config": {"term": "{{ $match }}"},
+            },
+        }
+    )
+    fields = avro_schema_to_mce_fields(schema, meta_mapping_processor=processor)
+    expected_field_paths = [
+        "[version=2.0].[type=Payment].[type=string].id",
+        "[version=2.0].[type=Payment].[type=double].amount",
+        "[version=2.0].[type=Payment].[type=string].name",
+        "[version=2.0].[type=Payment].[type=PhoneNumber].phoneNumber",
+        "[version=2.0].[type=Payment].[type=PhoneNumber].phoneNumber.[type=string].areaCode",
+        "[version=2.0].[type=Payment].[type=PhoneNumber].phoneNumber.[type=string].countryCode",
+        "[version=2.0].[type=Payment].[type=PhoneNumber].phoneNumber.[type=string].prefix",
+        "[version=2.0].[type=Payment].[type=PhoneNumber].phoneNumber.[type=string].number",
+        "[version=2.0].[type=Payment].[type=Address].address",
+        "[version=2.0].[type=Payment].[type=Address].address.[type=string].street",
+    ]
+    assert_field_paths_match(fields, expected_field_paths)
+
+    pii_tag_aspect = make_global_tag_aspect_with_tag_list(["has_pii_test"])
+    assert fields[1].globalTags is None
+    assert fields[2].globalTags == pii_tag_aspect
+    assert fields[3].globalTags == pii_tag_aspect
+    assert fields[3].glossaryTerms == make_glossary_terms_aspect_from_urn_list(
+        ["urn:li:glossaryTerm:PhoneNumber"]
+    )
+    assert fields[8].globalTags == pii_tag_aspect
+    assert fields[8].glossaryTerms == make_glossary_terms_aspect_from_urn_list(
+        ["urn:li:glossaryTerm:Address"]
+    )

From 501522d891a4c608784e0c92c32b99d67e80f4b0 Mon Sep 17 00:00:00 2001
From: Shubham Jagtap <132359390+shubhamjagtap639@users.noreply.github.com>
Date: Sat, 23 Sep 2023 05:42:48 +0530
Subject: [PATCH 046/156] feat(ingest/kafka-connect): Lineage for Kafka Connect
 > Snowflake (#8811)

Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
---
 .../docs/sources/kafka-connect/README.md      |   2 +-
 metadata-ingestion/setup.py                   |   1 +
 .../datahub/ingestion/source/kafka_connect.py | 108 +++++++++++++
 ...ka_connect_snowflake_sink_mces_golden.json | 152 ++++++++++++++++++
 .../kafka-connect/test_kafka_connect.py       | 100 ++++++++++++
 5 files changed, 362 insertions(+), 1 deletion(-)
 create mode 100644 metadata-ingestion/tests/integration/kafka-connect/kafka_connect_snowflake_sink_mces_golden.json

diff --git a/metadata-ingestion/docs/sources/kafka-connect/README.md b/metadata-ingestion/docs/sources/kafka-connect/README.md
index 5031bff5a3fac..e4f64c62914c5 100644
--- a/metadata-ingestion/docs/sources/kafka-connect/README.md
+++ b/metadata-ingestion/docs/sources/kafka-connect/README.md
@@ -21,4 +21,4 @@ This ingestion source maps the following Source System Concepts to DataHub Conce
 Works only for
 
 - Source connectors: JDBC, Debezium, Mongo and Generic connectors with user-defined lineage graph
-- Sink connectors: BigQuery
+- Sink connectors: BigQuery, Confluent S3, Snowflake
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 10e6ff554d9f8..a0d16aa92ad9b 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -505,6 +505,7 @@ def get_long_description():
             "nifi",
             "vertica",
             "mode",
+            "kafka-connect",
         ]
         if plugin
         for dependency in plugins[plugin]
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py
index b3fa5e3401c07..f3344782917ab 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py
@@ -901,6 +901,108 @@ def _extract_lineages(self):
         return
 
 
+@dataclass
+class SnowflakeSinkConnector:
+    connector_manifest: ConnectorManifest
+    report: KafkaConnectSourceReport
+
+    def __init__(
+        self, connector_manifest: ConnectorManifest, report: KafkaConnectSourceReport
+    ) -> None:
+        self.connector_manifest = connector_manifest
+        self.report = report
+        self._extract_lineages()
+
+    @dataclass
+    class SnowflakeParser:
+        database_name: str
+        schema_name: str
+        topics_to_tables: Dict[str, str]
+
+    def report_warning(self, key: str, reason: str) -> None:
+        logger.warning(f"{key}: {reason}")
+        self.report.report_warning(key, reason)
+
+    def get_table_name_from_topic_name(self, topic_name: str) -> str:
+        """
+        This function converts the topic name to a valid Snowflake table name using some rules.
+        Refer below link for more info
+        https://docs.snowflake.com/en/user-guide/kafka-connector-overview#target-tables-for-kafka-topics
+        """
+        table_name = re.sub("[^a-zA-Z0-9_]", "_", topic_name)
+        if re.match("^[^a-zA-Z_].*", table_name):
+            table_name = "_" + table_name
+        # Connector  may append original topic's hash code as suffix for conflict resolution
+        # if generated table names for 2 topics are similar. This corner case is not handled here.
+        # Note that Snowflake recommends to choose topic names that follow the rules for
+        # Snowflake identifier names so this case is not recommended by snowflake.
+        return table_name
+
+    def get_parser(
+        self,
+        connector_manifest: ConnectorManifest,
+    ) -> SnowflakeParser:
+        database_name = connector_manifest.config["snowflake.database.name"]
+        schema_name = connector_manifest.config["snowflake.schema.name"]
+
+        # Fetch user provided topic to table map
+        provided_topics_to_tables: Dict[str, str] = {}
+        if connector_manifest.config.get("snowflake.topic2table.map"):
+            for each in connector_manifest.config["snowflake.topic2table.map"].split(
+                ","
+            ):
+                topic, table = each.split(":")
+                provided_topics_to_tables[topic.strip()] = table.strip()
+
+        topics_to_tables: Dict[str, str] = {}
+        # Extract lineage for only those topics whose data ingestion started
+        for topic in connector_manifest.topic_names:
+            if topic in provided_topics_to_tables:
+                # If user provided which table to get mapped with this topic
+                topics_to_tables[topic] = provided_topics_to_tables[topic]
+            else:
+                # Else connector converts topic name to a valid Snowflake table name.
+                topics_to_tables[topic] = self.get_table_name_from_topic_name(topic)
+
+        return self.SnowflakeParser(
+            database_name=database_name,
+            schema_name=schema_name,
+            topics_to_tables=topics_to_tables,
+        )
+
+    def _extract_lineages(self):
+        self.connector_manifest.flow_property_bag = self.connector_manifest.config
+
+        # For all snowflake sink connector properties, refer below link
+        # https://docs.snowflake.com/en/user-guide/kafka-connector-install#configuring-the-kafka-connector
+        # remove private keys, secrets from properties
+        secret_properties = [
+            "snowflake.private.key",
+            "snowflake.private.key.passphrase",
+            "value.converter.basic.auth.user.info",
+        ]
+        for k in secret_properties:
+            if k in self.connector_manifest.flow_property_bag:
+                del self.connector_manifest.flow_property_bag[k]
+
+        lineages: List[KafkaConnectLineage] = list()
+        parser = self.get_parser(self.connector_manifest)
+
+        for topic, table in parser.topics_to_tables.items():
+            target_dataset = f"{parser.database_name}.{parser.schema_name}.{table}"
+            lineages.append(
+                KafkaConnectLineage(
+                    source_dataset=topic,
+                    source_platform=KAFKA,
+                    target_dataset=target_dataset,
+                    target_platform="snowflake",
+                )
+            )
+
+        self.connector_manifest.lineages = lineages
+        return
+
+
 @dataclass
 class ConfluentS3SinkConnector:
     connector_manifest: ConnectorManifest
@@ -1130,6 +1232,12 @@ def get_connectors_manifest(self) -> List[ConnectorManifest]:
                     connector_manifest = ConfluentS3SinkConnector(
                         connector_manifest=connector_manifest, report=self.report
                     ).connector_manifest
+                elif connector_manifest.config.get("connector.class").__eq__(
+                    "com.snowflake.kafka.connector.SnowflakeSinkConnector"
+                ):
+                    connector_manifest = SnowflakeSinkConnector(
+                        connector_manifest=connector_manifest, report=self.report
+                    ).connector_manifest
                 else:
                     self.report.report_dropped(connector_manifest.name)
                     logger.warning(
diff --git a/metadata-ingestion/tests/integration/kafka-connect/kafka_connect_snowflake_sink_mces_golden.json b/metadata-ingestion/tests/integration/kafka-connect/kafka_connect_snowflake_sink_mces_golden.json
new file mode 100644
index 0000000000000..76d49cebe5ae3
--- /dev/null
+++ b/metadata-ingestion/tests/integration/kafka-connect/kafka_connect_snowflake_sink_mces_golden.json
@@ -0,0 +1,152 @@
+[
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(kafka-connect,connect-instance-1.snowflake_sink1,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "connector.class": "com.snowflake.kafka.connector.SnowflakeSinkConnector",
+                "snowflake.database.name": "kafka_db",
+                "snowflake.schema.name": "kafka_schema",
+                "snowflake.topic2table.map": "topic1:table1",
+                "tasks.max": "1",
+                "topics": "topic1,_topic+2",
+                "snowflake.user.name": "kafka_connector_user_1",
+                "name": "snowflake_sink1",
+                "snowflake.url.name": "bcaurux-lc62744.snowflakecomputing.com:443"
+            },
+            "name": "snowflake_sink1",
+            "description": "Sink connector using `com.snowflake.kafka.connector.SnowflakeSinkConnector` plugin."
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1635166800000,
+        "runId": "kafka-connect-test"
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(kafka-connect,connect-instance-1.snowflake_sink1,PROD),topic1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {},
+            "name": "snowflake_sink1:topic1",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1635166800000,
+        "runId": "kafka-connect-test"
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(kafka-connect,connect-instance-1.snowflake_sink1,PROD),topic1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:kafka,topic1,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,kafka_db.kafka_schema.table1,PROD)"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1635166800000,
+        "runId": "kafka-connect-test"
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(kafka-connect,connect-instance-1.snowflake_sink1,PROD),_topic+2)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {},
+            "name": "snowflake_sink1:_topic+2",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1635166800000,
+        "runId": "kafka-connect-test"
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(kafka-connect,connect-instance-1.snowflake_sink1,PROD),_topic+2)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:kafka,_topic+2,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,kafka_db.kafka_schema._topic_2,PROD)"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1635166800000,
+        "runId": "kafka-connect-test"
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(kafka-connect,connect-instance-1.snowflake_sink1,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1635166800000,
+        "runId": "kafka-connect-test"
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(kafka-connect,connect-instance-1.snowflake_sink1,PROD),_topic+2)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1635166800000,
+        "runId": "kafka-connect-test"
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(kafka-connect,connect-instance-1.snowflake_sink1,PROD),topic1)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1635166800000,
+        "runId": "kafka-connect-test"
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py b/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py
index 5f907bb05443c..48063908e624f 100644
--- a/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py
+++ b/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py
@@ -534,3 +534,103 @@ def test_kafka_connect_ingest_stateful(
         "urn:li:dataJob:(urn:li:dataFlow:(kafka-connect,connect-instance-1.mysql_source2,PROD),librarydb.member)",
     ]
     assert sorted(deleted_job_urns) == sorted(difference_job_urns)
+
+
+def register_mock_api(request_mock: Any, override_data: dict = {}) -> None:
+    api_vs_response = {
+        "http://localhost:28083": {
+            "method": "GET",
+            "status_code": 200,
+            "json": {
+                "version": "7.4.0-ccs",
+                "commit": "30969fa33c185e880b9e02044761dfaac013151d",
+                "kafka_cluster_id": "MDgRZlZhSZ-4fXhwRR79bw",
+            },
+        },
+    }
+
+    api_vs_response.update(override_data)
+
+    for url in api_vs_response.keys():
+        request_mock.register_uri(
+            api_vs_response[url]["method"],
+            url,
+            json=api_vs_response[url]["json"],
+            status_code=api_vs_response[url]["status_code"],
+        )
+
+
+@freeze_time(FROZEN_TIME)
+def test_kafka_connect_snowflake_sink_ingest(
+    pytestconfig, tmp_path, mock_time, requests_mock
+):
+    test_resources_dir = pytestconfig.rootpath / "tests/integration/kafka-connect"
+    override_data = {
+        "http://localhost:28083/connectors": {
+            "method": "GET",
+            "status_code": 200,
+            "json": ["snowflake_sink1"],
+        },
+        "http://localhost:28083/connectors/snowflake_sink1": {
+            "method": "GET",
+            "status_code": 200,
+            "json": {
+                "name": "snowflake_sink1",
+                "config": {
+                    "connector.class": "com.snowflake.kafka.connector.SnowflakeSinkConnector",
+                    "snowflake.database.name": "kafka_db",
+                    "snowflake.schema.name": "kafka_schema",
+                    "snowflake.topic2table.map": "topic1:table1",
+                    "tasks.max": "1",
+                    "topics": "topic1,_topic+2",
+                    "snowflake.user.name": "kafka_connector_user_1",
+                    "snowflake.private.key": "rrSnqU=",
+                    "name": "snowflake_sink1",
+                    "snowflake.url.name": "bcaurux-lc62744.snowflakecomputing.com:443",
+                },
+                "tasks": [{"connector": "snowflake_sink1", "task": 0}],
+                "type": "sink",
+            },
+        },
+        "http://localhost:28083/connectors/snowflake_sink1/topics": {
+            "method": "GET",
+            "status_code": 200,
+            "json": {"snowflake_sink1": {"topics": ["topic1", "_topic+2"]}},
+        },
+    }
+
+    register_mock_api(request_mock=requests_mock, override_data=override_data)
+
+    pipeline = Pipeline.create(
+        {
+            "run_id": "kafka-connect-test",
+            "source": {
+                "type": "kafka-connect",
+                "config": {
+                    "platform_instance": "connect-instance-1",
+                    "connect_uri": KAFKA_CONNECT_SERVER,
+                    "connector_patterns": {
+                        "allow": [
+                            "snowflake_sink1",
+                        ]
+                    },
+                },
+            },
+            "sink": {
+                "type": "file",
+                "config": {
+                    "filename": f"{tmp_path}/kafka_connect_snowflake_sink_mces.json",
+                },
+            },
+        }
+    )
+
+    pipeline.run()
+    pipeline.raise_from_status()
+    golden_file = "kafka_connect_snowflake_sink_mces_golden.json"
+
+    mce_helpers.check_golden_file(
+        pytestconfig,
+        output_path=tmp_path / "kafka_connect_snowflake_sink_mces.json",
+        golden_path=f"{test_resources_dir}/{golden_file}",
+    )

From b905f26d77891e5b1e7406f4b1700bdfb2e9332b Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Fri, 22 Sep 2023 20:43:57 -0500
Subject: [PATCH 047/156] fix(test): fix test execution (#8889)

---
 build.gradle                                  |  5 ++++
 .../auth/ListAccessTokensResolverTest.java    | 24 ++++++++++++-------
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/build.gradle b/build.gradle
index 07a0e6ad1f49f..0a94991b131aa 100644
--- a/build.gradle
+++ b/build.gradle
@@ -289,6 +289,11 @@ subprojects {
     }
     // https://docs.gradle.org/current/userguide/performance.html
     maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
+
+    if (project.configurations.getByName("testImplementation").getDependencies()
+            .any{ it.getName() == "testng" }) {
+      useTestNG()
+    }
   }
 
   afterEvaluate {
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java
index 54b8d23bab301..52d06f73dcfab 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java
@@ -1,5 +1,6 @@
 package com.linkedin.datahub.graphql.resolvers.auth;
 
+import com.datahub.authentication.Authentication;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.datahub.graphql.QueryContext;
 import com.linkedin.datahub.graphql.TestUtils;
@@ -8,6 +9,10 @@
 import com.linkedin.datahub.graphql.generated.ListAccessTokenResult;
 import com.linkedin.entity.client.EntityClient;
 import com.linkedin.metadata.Constants;
+import com.linkedin.metadata.query.SearchFlags;
+import com.linkedin.metadata.query.filter.SortCriterion;
+import com.linkedin.metadata.search.SearchEntityArray;
+import com.linkedin.metadata.search.SearchResult;
 import graphql.schema.DataFetchingEnvironment;
 import java.util.Collections;
 import org.mockito.Mockito;
@@ -36,14 +41,17 @@ public void testGetSuccess() throws Exception {
     Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input);
 
     final EntityClient mockClient = Mockito.mock(EntityClient.class);
-    Mockito.when(Mockito.eq(mockClient.filter(
-        Mockito.eq(Constants.ACCESS_TOKEN_ENTITY_NAME),
-            Mockito.eq(buildFilter(filters, Collections.emptyList())),
-            Mockito.notNull(),
-            Mockito.eq(input.getStart()),
-            Mockito.eq(input.getCount()),
-            Mockito.eq(getAuthentication(mockEnv)))))
-        .thenReturn(null);
+    final Authentication testAuth = getAuthentication(mockEnv);
+    Mockito.when(mockClient.search(
+                    Mockito.eq(Constants.ACCESS_TOKEN_ENTITY_NAME),
+                    Mockito.eq(""),
+                    Mockito.eq(buildFilter(filters, Collections.emptyList())),
+                    Mockito.any(SortCriterion.class),
+                    Mockito.eq(input.getStart()),
+                    Mockito.eq(input.getCount()),
+                    Mockito.eq(testAuth),
+                    Mockito.any(SearchFlags.class)))
+            .thenReturn(new SearchResult().setFrom(0).setNumEntities(0).setPageSize(0).setEntities(new SearchEntityArray()));
 
     final ListAccessTokensResolver resolver = new ListAccessTokensResolver(mockClient);
     final ListAccessTokenResult listAccessTokenResult = resolver.get(mockEnv).get();

From 874109f76e6fd70da4b65541d50908c4637df073 Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Mon, 25 Sep 2023 14:04:05 +0530
Subject: [PATCH 048/156] feat(ingest/snowflake): allow shares config without
 platform instance (#8803)

---
 .../source/snowflake/snowflake_config.py      | 34 +++++----
 .../source/snowflake/snowflake_shares.py      |  6 +-
 .../tests/unit/test_snowflake_shares.py       | 74 +++++++++++++++++++
 3 files changed, 100 insertions(+), 14 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
index 0bc8bb17934f7..95f6444384408 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
@@ -51,15 +51,17 @@ class DatabaseId:
     database: str = Field(
         description="Database created from share in consumer account."
     )
-    platform_instance: str = Field(
-        description="Platform instance of consumer snowflake account."
+    platform_instance: Optional[str] = Field(
+        default=None,
+        description="Platform instance of consumer snowflake account.",
     )
 
 
 class SnowflakeShareConfig(ConfigModel):
     database: str = Field(description="Database from which share is created.")
-    platform_instance: str = Field(
-        description="Platform instance for snowflake account in which share is created."
+    platform_instance: Optional[str] = Field(
+        default=None,
+        description="Platform instance for snowflake account in which share is created.",
     )
 
     consumers: Set[DatabaseId] = Field(
@@ -247,10 +249,11 @@ def validate_shares(
 
         if shares:
             # Check: platform_instance should be present
-            assert current_platform_instance is not None, (
-                "Did you forget to set `platform_instance` for current ingestion ? "
-                "It is required to use `platform_instance` when ingesting from multiple snowflake accounts."
-            )
+            if current_platform_instance is None:
+                logger.info(
+                    "It is advisable to use `platform_instance` when ingesting from multiple snowflake accounts, if they contain databases with same name. "
+                    "Setting `platform_instance` allows distinguishing such databases without conflict and correctly ingest their metadata."
+                )
 
             databases_included_in_share: List[DatabaseId] = []
             databases_created_from_share: List[DatabaseId] = []
@@ -259,10 +262,11 @@ def validate_shares(
                 shared_db = DatabaseId(
                     share_details.database, share_details.platform_instance
                 )
-                assert all(
-                    consumer.platform_instance != share_details.platform_instance
-                    for consumer in share_details.consumers
-                ), "Share's platform_instance can not be same as consumer's platform instance. Self-sharing not supported in Snowflake."
+                if current_platform_instance:
+                    assert all(
+                        consumer.platform_instance != share_details.platform_instance
+                        for consumer in share_details.consumers
+                    ), "Share's platform_instance can not be same as consumer's platform instance. Self-sharing not supported in Snowflake."
 
                 databases_included_in_share.append(shared_db)
                 databases_created_from_share.extend(share_details.consumers)
@@ -306,7 +310,11 @@ def inbounds(self) -> Dict[str, DatabaseId]:
                             f"database {consumer.database} is created from inbound share {share_name}."
                         )
                         inbounds[consumer.database] = share_details.source_database
-                        break
+                        if self.platform_instance:
+                            break
+                        # If not using platform_instance, any one of consumer databases
+                        # can be the database from this instance. so we include all relevant
+                        # databases in inbounds.
                 else:
                     logger.info(
                         f"Skipping Share {share_name}, as it does not include current platform instance {self.platform_instance}",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_shares.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_shares.py
index 6f7520bbf1988..dad0ce7b59ee1 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_shares.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_shares.py
@@ -93,11 +93,15 @@ def report_missing_databases(
         db_names = [db.name for db in databases]
         missing_dbs = [db for db in inbounds + outbounds if db not in db_names]
 
-        if missing_dbs:
+        if missing_dbs and self.config.platform_instance:
             self.report_warning(
                 "snowflake-shares",
                 f"Databases {missing_dbs} were not ingested. Siblings/Lineage will not be set for these.",
             )
+        elif missing_dbs:
+            logger.debug(
+                f"Databases {missing_dbs} were not ingested in this recipe.",
+            )
 
     def gen_siblings(
         self,
diff --git a/metadata-ingestion/tests/unit/test_snowflake_shares.py b/metadata-ingestion/tests/unit/test_snowflake_shares.py
index 7de86139baf39..9e33ba6132e06 100644
--- a/metadata-ingestion/tests/unit/test_snowflake_shares.py
+++ b/metadata-ingestion/tests/unit/test_snowflake_shares.py
@@ -231,6 +231,7 @@ def test_snowflake_shares_workunit_inbound_share(
         else:
             siblings_aspect = wu.get_aspect_of_type(Siblings)
             assert siblings_aspect is not None
+            assert not siblings_aspect.primary
             assert len(siblings_aspect.siblings) == 1
             assert siblings_aspect.siblings == [
                 wu.get_urn().replace("instance1.db1", "instance2.db1")
@@ -275,6 +276,7 @@ def test_snowflake_shares_workunit_outbound_share(
     for wu in wus:
         siblings_aspect = wu.get_aspect_of_type(Siblings)
         assert siblings_aspect is not None
+        assert siblings_aspect.primary
         assert len(siblings_aspect.siblings) == 2
         assert siblings_aspect.siblings == [
             wu.get_urn().replace("instance1.db2", "instance2.db2_from_share"),
@@ -336,13 +338,85 @@ def test_snowflake_shares_workunit_inbound_and_outbound_share(
             siblings_aspect = wu.get_aspect_of_type(Siblings)
             assert siblings_aspect is not None
             if "db1" in wu.get_urn():
+                assert not siblings_aspect.primary
                 assert len(siblings_aspect.siblings) == 1
                 assert siblings_aspect.siblings == [
                     wu.get_urn().replace("instance1.db1", "instance2.db1")
                 ]
             else:
+                assert siblings_aspect.primary
                 assert len(siblings_aspect.siblings) == 2
                 assert siblings_aspect.siblings == [
                     wu.get_urn().replace("instance1.db2", "instance2.db2_from_share"),
                     wu.get_urn().replace("instance1.db2", "instance3.db2"),
                 ]
+
+
+def test_snowflake_shares_workunit_inbound_and_outbound_share_no_platform_instance(
+    snowflake_databases: List[SnowflakeDatabase],
+) -> None:
+    config = SnowflakeV2Config(
+        account_id="abc12345",
+        shares={
+            "share1": SnowflakeShareConfig(
+                database="db1",
+                consumers=[
+                    DatabaseId(database="db1_from_share"),
+                    DatabaseId(database="db1_other"),
+                ],
+            ),
+            "share2": SnowflakeShareConfig(
+                database="db2_main",
+                consumers=[
+                    DatabaseId(database="db2"),
+                    DatabaseId(database="db2_other"),
+                ],
+            ),
+        },
+    )
+
+    report = SnowflakeV2Report()
+    shares_handler = SnowflakeSharesHandler(
+        config, report, lambda x: make_snowflake_urn(x)
+    )
+
+    assert sorted(config.outbounds().keys()) == ["db1", "db2_main"]
+    assert sorted(config.inbounds().keys()) == [
+        "db1_from_share",
+        "db1_other",
+        "db2",
+        "db2_other",
+    ]
+    wus = list(shares_handler.get_shares_workunits(snowflake_databases))
+
+    # 6 Sibling aspects for db1 tables
+    # 6 Sibling aspects and and 6 upstreamLineage for db2 tables
+    assert len(wus) == 18
+
+    for wu in wus:
+        assert isinstance(
+            wu.metadata, (MetadataChangeProposal, MetadataChangeProposalWrapper)
+        )
+        if wu.metadata.aspectName == "upstreamLineage":
+            upstream_aspect = wu.get_aspect_of_type(UpstreamLineage)
+            assert upstream_aspect is not None
+            assert len(upstream_aspect.upstreams) == 1
+            assert upstream_aspect.upstreams[0].dataset == wu.get_urn().replace(
+                "db2.", "db2_main."
+            )
+        else:
+            siblings_aspect = wu.get_aspect_of_type(Siblings)
+            assert siblings_aspect is not None
+            if "db1" in wu.get_urn():
+                assert siblings_aspect.primary
+                assert len(siblings_aspect.siblings) == 2
+                assert siblings_aspect.siblings == [
+                    wu.get_urn().replace("db1.", "db1_from_share."),
+                    wu.get_urn().replace("db1.", "db1_other."),
+                ]
+            else:
+                assert not siblings_aspect.primary
+                assert len(siblings_aspect.siblings) == 1
+                assert siblings_aspect.siblings == [
+                    wu.get_urn().replace("db2.", "db2_main.")
+                ]

From 53eaac5963b6b88b9f1804b10300a2be53c142fc Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Mon, 25 Sep 2023 16:24:19 -0400
Subject: [PATCH 049/156] fix(ingest): bound types-requests (#8895)

---
 metadata-ingestion/setup.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index a0d16aa92ad9b..2387e848e68a2 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -405,7 +405,12 @@ def get_long_description():
     "types-pkg_resources",
     "types-six",
     "types-python-dateutil",
-    "types-requests>=2.28.11.6",
+    # We need to avoid 2.31.0.5 and 2.31.0.4 due to
+    # https://github.com/python/typeshed/issues/10764. Once that
+    # issue is resolved, we can remove the upper bound and change it
+    # to a != constraint.
+    # We have a PR up to fix the underlying issue: https://github.com/python/typeshed/pull/10776.
+    "types-requests>=2.28.11.6,<=2.31.0.3",
     "types-toml",
     "types-PyMySQL",
     "types-PyYAML",

From ed1d35c79bb41d06ce646ef44bf7ad810fd229b6 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Tue, 26 Sep 2023 00:10:49 -0400
Subject: [PATCH 050/156] fix(build): run codegen when building
 datahub-ingestion image (#8869)

---
 .github/workflows/docker-unified.yml          | 14 +++++++-------
 docker/datahub-ingestion-base/Dockerfile      |  4 ++--
 docker/datahub-ingestion/Dockerfile           |  2 +-
 docker/datahub-ingestion/Dockerfile-slim-only |  2 +-
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml
index 13c921e953c32..de3e0ca93e6b7 100644
--- a/.github/workflows/docker-unified.yml
+++ b/.github/workflows/docker-unified.yml
@@ -58,7 +58,7 @@ jobs:
           echo "full_tag=$(get_tag)-full" >> $GITHUB_OUTPUT
           echo "unique_tag=$(get_unique_tag)" >> $GITHUB_OUTPUT
           echo "unique_slim_tag=$(get_unique_tag)-slim" >> $GITHUB_OUTPUT
-          echo "unique_full_tag=$(get_unique_tag)-full" >> $GITHUB_OUTPUT
+          echo "unique_full_tag=$(get_unique_tag)" >> $GITHUB_OUTPUT
           echo "python_release_version=$(get_python_docker_release_v)" >> $GITHUB_OUTPUT
       - name: Check whether publishing enabled
         id: publish
@@ -501,7 +501,7 @@ jobs:
           platforms: linux/amd64,linux/arm64/v8
       - name: Compute DataHub Ingestion (Base-Slim) Tag
         id: tag
-        run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head' }}" >> $GITHUB_OUTPUT
+        run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> $GITHUB_OUTPUT
   datahub_ingestion_base_full_build:
     name: Build and Push DataHub Ingestion (Base-Full) Docker Image
     runs-on: ubuntu-latest
@@ -567,13 +567,13 @@ jobs:
             datahub-ingestion:
               - 'docker/datahub-ingestion/**'
       - name: Build codegen
-        if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }}
+        if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }}
         run: ./gradlew :metadata-ingestion:codegen
       - name: Download Base Image
         uses: ishworkh/docker-image-artifact-download@v1
         if: ${{ needs.setup.outputs.publish != 'true' && steps.filter.outputs.datahub-ingestion-base == 'true' }}
         with:
-          image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head' }}
+          image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}
       - name: Build and push Slim Image
         if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }}
         uses: ./.github/actions/docker-custom-build-and-push
@@ -583,7 +583,7 @@ jobs:
             ${{ env.DATAHUB_INGESTION_IMAGE }}
           build-args: |
             BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }}
-            DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head' }}
+            DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}
             RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }}
             APP_ENV=slim
           tags: ${{ needs.setup.outputs.slim_tag }}
@@ -595,7 +595,7 @@ jobs:
           platforms: linux/amd64,linux/arm64/v8
       - name: Compute Tag
         id: tag
-        run: echo "tag=${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && needs.setup.outputs.unique_slim_tag || 'head' }}" >> $GITHUB_OUTPUT
+        run: echo "tag=${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> $GITHUB_OUTPUT
   datahub_ingestion_slim_scan:
     permissions:
       contents: read # for actions/checkout to fetch code
@@ -650,7 +650,7 @@ jobs:
             datahub-ingestion:
               - 'docker/datahub-ingestion/**'
       - name: Build codegen
-        if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }}
+        if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }}
         run: ./gradlew :metadata-ingestion:codegen
       - name: Download Base Image
         uses: ishworkh/docker-image-artifact-download@v1
diff --git a/docker/datahub-ingestion-base/Dockerfile b/docker/datahub-ingestion-base/Dockerfile
index 3d47f79617370..564cc19cc9a5f 100644
--- a/docker/datahub-ingestion-base/Dockerfile
+++ b/docker/datahub-ingestion-base/Dockerfile
@@ -1,7 +1,7 @@
 ARG APP_ENV=full
 ARG BASE_IMAGE=base
 
-FROM golang:1-alpine3.17 AS binary
+FROM golang:1-alpine3.17 AS dockerize-binary
 
 ENV DOCKERIZE_VERSION v0.6.1
 WORKDIR /go/src/github.com/jwilder
@@ -41,7 +41,7 @@ RUN apt-get update && apt-get install -y -qq \
     && rm -rf /var/lib/apt/lists/* /var/cache/apk/*
 
 # compiled against newer golang for security fixes
-COPY --from=binary /go/bin/dockerize /usr/local/bin
+COPY --from=dockerize-binary /go/bin/dockerize /usr/local/bin
 
 COPY ./docker/datahub-ingestion-base/base-requirements.txt requirements.txt
 COPY ./docker/datahub-ingestion-base/entrypoint.sh /entrypoint.sh
diff --git a/docker/datahub-ingestion/Dockerfile b/docker/datahub-ingestion/Dockerfile
index 8b726df5e8842..0132ceaa9b1a9 100644
--- a/docker/datahub-ingestion/Dockerfile
+++ b/docker/datahub-ingestion/Dockerfile
@@ -1,7 +1,7 @@
 # Defining environment
 ARG APP_ENV=full
 ARG BASE_IMAGE=acryldata/datahub-ingestion-base
-ARG DOCKER_VERSION=latest
+ARG DOCKER_VERSION=head
 
 FROM $BASE_IMAGE:$DOCKER_VERSION as base
 USER 0
diff --git a/docker/datahub-ingestion/Dockerfile-slim-only b/docker/datahub-ingestion/Dockerfile-slim-only
index 9ae116f839aa0..cb8c27ab463c4 100644
--- a/docker/datahub-ingestion/Dockerfile-slim-only
+++ b/docker/datahub-ingestion/Dockerfile-slim-only
@@ -1,6 +1,6 @@
 # Defining environment
 ARG BASE_IMAGE=acryldata/datahub-ingestion-base
-ARG DOCKER_VERSION=latest
+ARG DOCKER_VERSION=head-slim
 
 FROM $BASE_IMAGE:$DOCKER_VERSION as base
 USER 0

From 0a869dd6f8784d50039da308313946d399b0c8ce Mon Sep 17 00:00:00 2001
From: Tamas Nemeth <treff7es@gmail.com>
Date: Tue, 26 Sep 2023 10:28:03 +0200
Subject: [PATCH 051/156] fix(ingest/s3): Converting windows style path to
 posix one on local fs (#8757)

---
 .../src/datahub/ingestion/source/s3/source.py               | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
index ab5d3a4e007ac..ac4433b7eb1f0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
@@ -7,6 +7,7 @@
 import time
 from collections import OrderedDict
 from datetime import datetime
+from pathlib import PurePath
 from typing import Any, Dict, Iterable, List, Optional, Tuple
 
 from more_itertools import peekable
@@ -819,7 +820,10 @@ def local_browser(self, path_spec: PathSpec) -> Iterable[Tuple[str, datetime, in
                 dirs.sort(key=functools.cmp_to_key(partitioned_folder_comparator))
 
                 for file in sorted(files):
-                    full_path = os.path.join(root, file)
+                    # We need to make sure the path is in posix style which is not true on windows
+                    full_path = PurePath(
+                        os.path.normpath(os.path.join(root, file))
+                    ).as_posix()
                     yield full_path, datetime.utcfromtimestamp(
                         os.path.getmtime(full_path)
                     ), os.path.getsize(full_path)

From 9972d51205383a632bb549ad1f380561c2eeb83a Mon Sep 17 00:00:00 2001
From: John Joyce <john@acryl.io>
Date: Tue, 26 Sep 2023 08:40:32 -0700
Subject: [PATCH 052/156] fix(docs): Rebranding custom to custom SQL (#8896)

---
 docs-website/sidebars.js                      |  2 +-
 ...assertions.md => custom-sql-assertions.md} | 74 +++++++++----------
 2 files changed, 38 insertions(+), 38 deletions(-)
 rename docs/managed-datahub/observe/{custom-assertions.md => custom-sql-assertions.md} (77%)

diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index 03ea38fd622d4..06396d6088277 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -437,7 +437,7 @@ module.exports = {
           Observability: [
             "docs/managed-datahub/observe/freshness-assertions",
             "docs/managed-datahub/observe/volume-assertions",
-            "docs/managed-datahub/observe/custom-assertions",
+            "docs/managed-datahub/observe/custom-sql-assertions",
           ],
         },
       ],
diff --git a/docs/managed-datahub/observe/custom-assertions.md b/docs/managed-datahub/observe/custom-sql-assertions.md
similarity index 77%
rename from docs/managed-datahub/observe/custom-assertions.md
rename to docs/managed-datahub/observe/custom-sql-assertions.md
index e221cf1058fd0..d4a09b434ca79 100644
--- a/docs/managed-datahub/observe/custom-assertions.md
+++ b/docs/managed-datahub/observe/custom-sql-assertions.md
@@ -4,12 +4,12 @@ description: This page provides an overview of working with DataHub SQL Assertio
 import FeatureAvailability from '@site/src/components/FeatureAvailability';
 
 
-# Custom Assertions
+# Custom SQL Assertions
 
 <FeatureAvailability saasOnly />
 
 
-> ⚠️ The **Custom Assertions** feature is currently in private beta, part of the **Acryl Observe** module, and may only be available to a
+> ⚠️ The **Custom SQL Assertions** feature is currently in private beta, part of the **Acryl Observe** module, and may only be available to a
 > limited set of design partners.
 >
 > If you are interested in trying it and providing feedback, please reach out to your Acryl Customer Success
@@ -27,18 +27,18 @@ changes to key metric definitions, etc. Often times, these changes break importa
 like reporting dashboards or data-driven product features.
 
 What if you could reduce the time to detect these incidents, so that the people responsible for the data were made aware of data
-issues _before_ anyone else? With Acryl DataHub **Custom Assertions**, you can.
+issues _before_ anyone else? With Acryl DataHub **Custom SQL Assertions**, you can.
 
 Acryl DataHub allows users to define complex expectations about a particular warehouse Table through custom SQL queries, and then monitor those expectations over time as the table grows and changes.
 
-In this article, we'll cover the basics of monitoring Custom Assertions - what they are, how to configure them, and more - so that you and your team can
+In this article, we'll cover the basics of monitoring Custom SQL Assertions - what they are, how to configure them, and more - so that you and your team can
 start building trust in your most important data assets.
 
 Let's get started!
 
 ## Support
 
-Custom Assertions are currently supported for:
+Custom SQL Assertions are currently supported for:
 
 1. Snowflake
 2. Redshift
@@ -50,24 +50,24 @@ tab.
 > Note that SQL Assertions are not yet supported if you are connecting to your warehouse
 > using the DataHub CLI or a Remote Ingestion Executor.
 
-## What is a Custom Assertion?
+## What is a Custom SQL Assertion?
 
-A **Custom Assertion** is a highly configurable Data Quality rule used to monitor a Data Warehouse Table
-for unexpected or sudden changes in its meaning. Custom Assertions are defined through a raw SQL query that is evaluated against
+A **Custom SQL Assertion** is a highly configurable Data Quality rule used to monitor a Data Warehouse Table
+for unexpected or sudden changes in its meaning. Custom SQL Assertions are defined through a raw SQL query that is evaluated against
 the Table. You have full control over the SQL query, and can use any SQL features supported by your Data Warehouse. 
-Custom Assertions can be particularly useful when you have complex tables or relationships
+Custom SQL Assertions can be particularly useful when you have complex tables or relationships
 that are used to generate important metrics or reports, and where the meaning of the table is expected to be stable over time.
-If you have existing SQL queries that you already use to monitor your data, you may find that Custom Assertions are an easy way to port them
+If you have existing SQL queries that you already use to monitor your data, you may find that Custom SQL Assertions are an easy way to port them
 to Acryl DataHub to get started.
 
 For example, imagine that you have a Table that tracks the number of purchases made on your company's e-commerce web store.
 You have a SQL query that you use to calculate the number of purchases made in the past 24 hours, and you'd like to monitor this
-metric over time to ensure that it is always greater than 1000. You can use a Custom Assertion to do this!
+metric over time to ensure that it is always greater than 1000. You can use a Custom SQL Assertion to do this!
 
 
-### Anatomy of a Custom Assertion
+### Anatomy of a Custom SQL Assertion
 
-At the most basic level, **Custom Assertions** consist of a few important parts:
+At the most basic level, **Custom SQL Assertions** consist of a few important parts:
 
 1. An **Evaluation Schedule**
 2. A **Query**
@@ -86,8 +86,8 @@ minutes in an hour.
 
 #### 2. Query
 
-The **Query**: This is the SQL query that will be used to evaluate the Table. The query should return a single row with a single column. Currently only numeric values are supported (integer and floats). The query can be as simple or as complex as you'd like, and can use any SQL features supported by your Data Warehouse. This requires that the configured user account has read access to the asset. Make sure to use the fully qualified name of the Table in your query.
-
+The **Query**: This is the SQL query that will be used to evaluate the Table. The query should return a **single row** containing a **single numeric column** (integers, floats). 
+The query can be as simple or as complex as you'd like, and can use any SQL features supported by your Data Warehouse. This requires that the configured user account has read access to the asset. Make sure to use the fully qualified name of the Table in your query.
 
 Use the "Try it out" button to test your query and ensure that it returns a single row with a single column. The query will be run against the Table in the context of the configured user account, so ensure that the user has read access to the Table.
 
@@ -99,29 +99,29 @@ The **Condition Type**: This defines the conditions under which the Assertion wi
 - **Is Not Equal To**: The assertion will fail if the query result is not equal to the configured value
 - **Is Greater Than**: The assertion will fail if the query result is greater than the configured value
 - **Is Less Than**: The assertion will fail if the query result is less than the configured value
-- **Is outside a range**: The assertion will fail if the query result is outside the configured range
+- **Is Outside a Range**: The assertion will fail if the query result is outside the configured range
 - **Grows More Than**: The assertion will fail if the query result grows more than the configured range. This can be either a percentage (**Percentage**) or a number (**Value**).
 - **Grows Less Than**: The assertion will fail if the query result grows less than the configured percentage. This can be either a percentage (**Percentage**) or a number (**Value**).
 - **Growth is outside a range**: The assertion will fail if the query result growth is outside the configured range. This can be either a percentage (**Percentage**) or a number (**Value**).
   
-Custom Assertions also have an off switch: they can be started or stopped at any time with the click of button.
+Custom SQL Assertions also have an off switch: they can be started or stopped at any time with the click of button.
 
 #### 4. Assertion Description
 
 The **Assertion Description**: This is a human-readable description of the Assertion. It should be used to describe the meaning of the Assertion, and can be used to provide additional context to users who are viewing the Assertion.
 
 
-## Creating a Custom Assertion
+## Creating a Custom SQL Assertion
 
 ### Prerequisites
 
-1. **Permissions**: To create or delete Custom Assertions for a specific entity on DataHub, you'll need to be granted the
+1. **Permissions**: To create or delete Custom SQL Assertions for a specific entity on DataHub, you'll need to be granted the
    `Edit Assertions` and `Edit Monitors` privileges for the entity. This is granted to Entity owners by default.
 
-2. **Data Platform Connection**: In order to create a Custom Assertion, you'll need to have an **Ingestion Source** configured to your
+2. **Data Platform Connection**: In order to create a Custom SQL Assertion, you'll need to have an **Ingestion Source** configured to your
    Data Platform: Snowflake, BigQuery, or Redshift under the **Integrations** tab.
 
-Once these are in place, you're ready to create your Custom Assertions!
+Once these are in place, you're ready to create your Custom SQL Assertions!
 
 ### Steps
 
@@ -168,23 +168,23 @@ Once these are in place, you're ready to create your Custom Assertions!
 </p>
 
 10. Click **Next**
-11. Configure actions that should be taken when the Custom Assertion passes or fails
+11. Configure actions that should be taken when the Custom SQL Assertion passes or fails
 
 <p align="left">
   <img width="45%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/freshness/assertion-builder-actions.png"/>
 </p>
 
-- **Raise incident**: Automatically raise a new DataHub Incident for the Table whenever the Custom Assertion is failing. This
+- **Raise incident**: Automatically raise a new DataHub Incident for the Table whenever the Custom SQL Assertion is failing. This
   may indicate that the Table is unfit for consumption. Configure Slack Notifications under **Settings** to be notified when
   an incident is created due to an Assertion failure.
-- **Resolve incident**: Automatically resolved any incidents that were raised due to failures in this Custom Assertion. Note that
+- **Resolve incident**: Automatically resolved any incidents that were raised due to failures in this Custom SQL Assertion. Note that
   any other incidents will not be impacted.
 
 1.  Click **Save**.
 
-And that's it! DataHub will now begin to monitor your Custom Assertion for the table.
+And that's it! DataHub will now begin to monitor your Custom SQL Assertion for the table.
 
-To view the time of the next Custom Assertion evaluation, simply click **Custom** and then click on your
+To view the time of the next Custom SQL Assertion evaluation, simply click **Custom** and then click on your
 new Assertion:
 
 <p align="center">
@@ -198,12 +198,12 @@ Once your assertion has run, you will begin to see Success or Failure status for
 </p>
 
 
-## Stopping a Custom Assertion
+## Stopping a Custom SQL Assertion
 
-In order to temporarily stop the evaluation of a Custom Assertion:
+In order to temporarily stop the evaluation of a Custom SQL Assertion:
 
 1. Navigate to the **Validations** tab of the Table with the assertion
-2. Click **Custom** to open the Custom Assertions list
+2. Click **Custom** to open the Custom SQL Assertions list
 3. Click the three-dot menu on the right side of the assertion you want to disable
 4. Click **Stop**
 
@@ -211,16 +211,16 @@ In order to temporarily stop the evaluation of a Custom Assertion:
   <img width="25%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/freshness/manage-assertion-menu.png"/>
 </p>
 
-To resume the Custom Assertion, simply click **Turn On**.
+To resume the Custom SQL Assertion, simply click **Turn On**.
 
 <p align="center">
   <img width="90%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/observe/freshness/stopped-assertion.png"/>
 </p>
 
 
-## Creating Custom Assertions via API
+## Creating Custom SQL Assertions via API
 
-Under the hood, Acryl DataHub implements Custom Assertion Monitoring using two "entity" concepts:
+Under the hood, Acryl DataHub implements Custom SQL Assertion Monitoring using two "entity" concepts:
 
 - **Assertion**: The specific expectation for the custom assertion, e.g. "The table was changed in the past 7 hours"
   or "The table is changed on a schedule of every day by 8am". This is the "what".
@@ -233,15 +233,15 @@ Note that to create or delete Assertions and Monitors for a specific entity on D
 
 #### GraphQL
 
-In order to create a Custom Assertion that is being monitored on a specific **Evaluation Schedule**, you'll need to use 2
-GraphQL mutation queries to create a Custom Assertion entity and create an Assertion Monitor entity responsible for evaluating it.
+In order to create a Custom SQL Assertion that is being monitored on a specific **Evaluation Schedule**, you'll need to use 2
+GraphQL mutation queries to create a Custom SQL Assertion entity and create an Assertion Monitor entity responsible for evaluating it.
 
-Start by creating the Custom Assertion entity using the `createSqlAssertion` query and hang on to the 'urn' field of the Assertion entity
+Start by creating the Custom SQL Assertion entity using the `createSqlAssertion` query and hang on to the 'urn' field of the Assertion entity
 you get back. Then continue by creating a Monitor entity using the `createAssertionMonitor`.
 
 ##### Examples
 
-To create a Custom Assertion Entity that checks whether a query result is greater than 100:
+To create a Custom SQL Assertion Entity that checks whether a query result is greater than 100:
 
 ```json
 mutation createSqlAssertion {
@@ -265,7 +265,7 @@ mutation createSqlAssertion {
 }
 ```
 
-The supported custom assertion types are `METRIC` and `METRIC_CHANGE`. If you choose `METRIC_CHANGE`,
+The supported assertion types are `METRIC` and `METRIC_CHANGE`. If you choose `METRIC_CHANGE`,
 you will need to provide a `changeType` parameter with either `ABSOLUTE` or `PERCENTAGE` values.
 The supported operator types are `EQUAL_TO`, `NOT_EQUAL_TO`, `GREATER_THAN`, `GREATER_THAN_OR_EQUAL_TO`, `LESS_THAN`, `LESS_THAN_OR_EQUAL_TO`, and `BETWEEN` (requires minValue, maxValue).
 The supported parameter types are `NUMBER`. 

From 54ec12a866868406e83464b21bab5147d11bc5fa Mon Sep 17 00:00:00 2001
From: Zachary McNellis <zacharymcnellis@gmail.com>
Date: Tue, 26 Sep 2023 11:41:36 -0400
Subject: [PATCH 053/156] docs(observability): Freshness Assertion Operation
 Types (#8907)

---
 docs/managed-datahub/observe/freshness-assertions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/managed-datahub/observe/freshness-assertions.md b/docs/managed-datahub/observe/freshness-assertions.md
index c5d4ca9081b43..82de423f6f2de 100644
--- a/docs/managed-datahub/observe/freshness-assertions.md
+++ b/docs/managed-datahub/observe/freshness-assertions.md
@@ -125,7 +125,7 @@ Change Source types vary by the platform, but generally fall into these categori
   - **DataHub Operation**: A DataHub "Operation" aspect contains timeseries information used to describe changes made to an entity. Using this
     option avoids contacting your data platform, and instead uses the DataHub Operation metadata to evaluate Freshness Assertions.
     This relies on Operations being reported to DataHub, either via ingestion or via use of the DataHub APIs (see [Report Operation via API](#reporting-operations-via-api)).
-    Note if you have not configured an ingestion source through DataHub, then this may be the only option available.
+    Note if you have not configured an ingestion source through DataHub, then this may be the only option available. By default, any operation type found will be considered a valid change. Use the **Operation Types** dropdown when selecting this option to specify which operation types should be considered valid changes. You may choose from one of DataHub's standard Operation Types, or specify a "Custom" Operation Type by typing in the name of the Operation Type.
 
   Using either of the column value approaches (**Last Modified Column** or **High Watermark Column**) to determine whether a Table has changed can be useful because it can be customized to determine whether specific types of important changes have been made to a given Table.
   Because it does not involve system warehouse tables, it is also easily portable across Data Warehouse and Data Lake providers. 

From 2e1afaf7a49acbccef9d90047c104479a266cf49 Mon Sep 17 00:00:00 2001
From: siddiquebagwan <mohdsiddiquebagwan@gmail.com>
Date: Tue, 26 Sep 2023 22:20:53 +0530
Subject: [PATCH 054/156] doc(ingestion): looker & lookml ingestion guide
 (#8006)

Co-authored-by: MohdSiddiqueBagwan <mohdsiddique.bagwan@gslab.com>
Co-authored-by: Hyejin Yoon <0327jane@gmail.com>
---
 docs-website/sidebars.js                      |   7 +
 .../looker/configuration.md                   | 212 ++++++++++++++++++
 .../quick-ingestion-guides/looker/overview.md |  52 +++++
 docs/quick-ingestion-guides/looker/setup.md   | 156 +++++++++++++
 4 files changed, 427 insertions(+)
 create mode 100644 docs/quick-ingestion-guides/looker/configuration.md
 create mode 100644 docs/quick-ingestion-guides/looker/overview.md
 create mode 100644 docs/quick-ingestion-guides/looker/setup.md

diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index 06396d6088277..b07cd0b03ce11 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -81,6 +81,13 @@ module.exports = {
                 "docs/quick-ingestion-guides/powerbi/configuration",
               ],
             },
+            {
+              Looker: [
+                "docs/quick-ingestion-guides/looker/overview",
+                "docs/quick-ingestion-guides/looker/setup",
+                "docs/quick-ingestion-guides/looker/configuration",
+              ],
+            },
           ],
         },
         {
diff --git a/docs/quick-ingestion-guides/looker/configuration.md b/docs/quick-ingestion-guides/looker/configuration.md
new file mode 100644
index 0000000000000..d9ba1907b006e
--- /dev/null
+++ b/docs/quick-ingestion-guides/looker/configuration.md
@@ -0,0 +1,212 @@
+---
+title: Configuration
+---
+# Configuring Looker & LookML Connector
+
+Now that you have created a DataHub-specific API key with the relevant access in [the prior step](setup.md), it's time to set up a connection via the DataHub UI.
+
+## Configure Secrets
+
+You must create two secrets to configure a connection with Looker or LookerML.
+
+* `LOOKER_CLIENT_ID`
+* `LOOKER_CLIENT_SECRET`
+
+On your DataHub instance, navigate to the **Ingestion** tab in your screen's top right corner.
+
+<p align="center">
+  <img width="75%" alt="Navigate to the &quot;Ingestion Tab&quot;" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/common/common_ingestion_ingestion_button.png"/>
+</p>
+
+:::note
+If you do not see the Ingestion tab, please get in touch with your DataHub admin to grant you the correct permissions.
+:::
+
+Navigate to the **Secrets** tab and click **Create new secret**.
+
+<p align="center">
+   <img width="75%" alt="Secrets Tab" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/common/common_ingestion_secrets_tab.png"/>
+</p>
+
+First, create a secret for the **Client Id**. The value should be the **Client Id** of the API key created in the [prior step](http://localhost:3000/docs/next/quick-ingestion-guides/looker/setup#create-an-api-key). 
+
+<p align="center">
+   <img width="70%" alt="API Key Client ID" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/looker-client-id-secret.png"/>
+</p>
+
+Then, create a secret for the **Client Secret**. The value should be the **Client Secret** of the API key created in the [prior step](http://localhost:3000/docs/next/quick-ingestion-guides/looker/setup#create-an-api-key). 
+
+<p align="center">
+   <img width="70%" alt="API Key client secret" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/looker-client-secret.png"/>
+</p>
+
+
+## Configure Looker Ingestion 
+
+### Configure Recipe
+
+Navigate to the **Sources** tab and click **Create new source**.
+
+  <p align="center">
+    <img width="75%" alt="Click &quot;Create new source&quot;" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/common/common_ingestion_click_create_new_source_button.png"/>
+  </p>
+
+Choose `Looker`.
+
+  <p align="center">
+    <img width="70%" alt="Select Looker from the options" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/looker-choose-looker.png"/>
+  </p>
+
+Enter the details into the Looker Recipe.
+
+* **Base URL:** This is your looker instance URL. (i.e. `https://<your-looker-instance>.cloud.looker.com`)
+* **Client ID:** Use the secret LOOKER_CLIENT_ID with the format `${LOOKER_CLIENT_ID}`.
+* **Client Secret:** Use the secret LOOKER_CLIENT_SECRET with the format `${LOOKER_CLIENT_SECRET}`.
+
+
+Optionally, use the `dashboard_pattern` and `chart_pattern` fields to filter for specific dashboard and chart.
+
+    config:
+         ...
+         dashboard_pattern:
+            allow:
+              - "2"
+         chart_pattern:
+            allow:
+              - "258829b1-82b1-4bdb-b9fb-6722c718bbd3"
+
+Your recipe should look something like this:
+
+<p align="center">
+  <img width="70%" alt="Looker Recipe" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/looker-ingestion-source-recipe.png"/>
+</p>
+
+ After completing the recipe, click **Next**. 
+
+### Schedule Execution
+
+Now, it's time to schedule a recurring ingestion pipeline to extract metadata from your Looker instance regularly.
+
+Decide how regularly you want this ingestion to run-- day, month, year, hour, minute, etc. Select from the dropdown.
+
+<p align="center">
+    <img width="75%" alt="schedule selector" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/common/common_ingestion_set_execution_schedule.png"/>
+</p>  
+
+Ensure you've configured your correct timezone.
+
+<p align="center">
+    <img width="75%" alt="timezone_selector" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/common/common_ingestion_set_execution_timezone.png"/>
+</p>  
+
+Finally, click **Next** when you are done.
+
+### Finish Up
+
+Name your ingestion source, then click **Save and Run**.
+
+<p align="center">
+  <img width="75%" alt="Name your ingestion" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/looker-ingestion-source-window.png"/>
+</p>  
+
+You will now find your new ingestion source running.
+
+<p align="center">
+  <img width="75%" alt="ingestion_running" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/looker-ingestion-running.png"/>
+</p>  
+
+## Configure LookML Connector
+
+Now that you have created a DataHub-specific API key and Deploy Key with the relevant access in [the prior step](setup.md), it's time to set up a connection via the DataHub UI.
+
+### Configure Recipe
+
+Navigate to the **Sources** tab and click **Create new source**.
+
+  <p align="center">
+    <img width="75%" alt="Click &quot;Create new source&quot;" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/common/common_ingestion_click_create_new_source_button.png"/>
+  </p>
+
+Choose `LooML`.
+
+  <p align="center">
+    <img width="70%" alt="Select Looker from the options" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/lookml-choose-lookml.png"/>
+  </p>
+
+Enter the details into the Looker Recipe. You need to set a minimum 5 fields in the recipe for this quick ingestion guide:
+
+* **GitHub Repository:** This is your GitHub repository where LookML models are stored. You can provide the full URL (example: https://gitlab.com/gitlab-org/gitlab) or organization/repo; in this case, the connector assume it is a GitHub repo
+* **GitHub Deploy Key:** Copy the content of `looker_datahub_deploy_key` and paste into this filed.
+* **Looker Base URL:** This is your looker instance URL. (i.e. https://abc.cloud.looker.com)
+* **Looker Client ID:** Use the secret LOOKER_CLIENT_ID with the format `${LOOKER_CLIENT_ID}`.
+* **Looker Client Secret:** Use the secret LOOKER_CLIENT_SECRET with the format `${LOOKER_CLIENT_SECRET}`.
+
+Your recipe should look something like this:
+
+<p align="center">
+  <img width="70%" alt="LookML Recipe" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/lookml-ingestion-source-recipe.png"/>
+</p>
+
+
+After completing the recipe, click **Next**.    
+
+### Schedule Execution
+
+Now, it's time to schedule a recurring ingestion pipeline to extract metadata from your Looker instance regularly.
+
+Decide how regularly you want this ingestion to run-- day, month, year, hour, minute, etc. Select from the dropdown.
+
+<p align="center">
+    <img width="75%" alt="schedule selector" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/common/common_ingestion_set_execution_schedule.png"/>
+</p>  
+
+Ensure you've configured your correct timezone.
+<p align="center">
+    <img width="75%" alt="timezone_selector" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/common/common_ingestion_set_execution_timezone.png"/>
+</p>  
+
+Click **Next** when you are done.
+
+### Finish Up
+
+Name your ingestion source, then click **Save and Run**.
+<p align="center">
+  <img width="75%" alt="Name your ingestion" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/lookml-ingestion-source-window.png"/>
+</p>  
+
+You will now find your new ingestion source running.
+
+<p align="center">
+  <img width="75%" alt="ingestion_running" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/lookml-ingestion-running.png"/>
+</p>  
+
+## Validate Ingestion Runs
+
+View the latest status of ingestion runs on the Ingestion page.
+
+<p align="center">
+  <img width="75%" alt="ingestion succeeded" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/lookml-ingestion-succeeded.png"/>
+</p>  
+
+Click the `+` sign to expand the complete list of historical runs and outcomes; click **Details** to see the results of a specific run.
+
+<p align="center">
+  <img width="75%" alt="ingestion_details" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/lookml-ingestion-history.png"/>
+</p>
+
+From the Ingestion Run Details page, pick **View All** to see which entities were ingested.
+
+<p align="center">
+  <img width="75%" alt="ingestion_details_view_all" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/lookml-ingestion-detail.png"/>
+</p>  
+
+Pick an entity from the list to manually validate if it contains the detail you expected.  
+
+<p align="center">
+  <img width="75%" alt="ingestion_details_view_all" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/lookml-ingestion-assets.png"/>
+</p>  
+
+
+**Congratulations!** You've successfully set up Looker & LookML as an ingestion source for DataHub!
+
+*Need more help? Join the conversation in [Slack](http://slack.datahubproject.io)!*
diff --git a/docs/quick-ingestion-guides/looker/overview.md b/docs/quick-ingestion-guides/looker/overview.md
new file mode 100644
index 0000000000000..843d704526bcc
--- /dev/null
+++ b/docs/quick-ingestion-guides/looker/overview.md
@@ -0,0 +1,52 @@
+---
+title: Overview
+---
+# Looker & LookML Ingestion Guide: Overview
+
+## What You Will Get Out of This Guide
+
+This guide will help you set up the Looker & LookML connectors to begin ingesting metadata into DataHub.
+Upon completing this guide, you will have a recurring ingestion pipeline to extract metadata from Looker & LookML and load it into DataHub. 
+
+### Looker
+
+Looker connector will ingest Looker asset types:
+
+* [Dashboards](https://cloud.google.com/looker/docs/dashboards)
+* [Charts](https://cloud.google.com/looker/docs/creating-visualizations)
+* [Explores](https://cloud.google.com/looker/docs/reference/param-explore-explore) 
+* [Schemas](https://developers.looker.com/api/explorer/4.0/methods/Metadata/connection_schemas) 
+* [Owners of Dashboards](https://cloud.google.com/looker/docs/creating-user-defined-dashboards)
+
+:::note
+
+To get complete Looker metadata integration (including Looker views and lineage to the underlying warehouse tables), you must also use the [lookml](https://datahubproject.io/docs/generated/ingestion/sources/looker#module-lookml) connector.
+
+:::
+
+
+### LookML 
+
+LookMl connector will include the following LookML asset types:
+
+* [LookML views from model files in a project](https://cloud.google.com/looker/docs/reference/param-view-view)
+* [Metadata for dimensions](https://cloud.google.com/looker/docs/reference/param-field-dimension)
+* [Metadata for measures](https://cloud.google.com/looker/docs/reference/param-measure-types)
+* [Dimension Groups as tag](https://cloud.google.com/looker/docs/reference/param-field-dimension-group)
+
+:::note
+
+To get complete Looker metadata integration (including Looker views and lineage to the underlying warehouse tables), you must also use the [looker](https://datahubproject.io/docs/generated/ingestion/sources/looker#module-looker) connector.
+
+:::
+
+## Next Steps
+Please continue to the [setup guide](setup.md), where we'll describe the prerequisites.
+
+### Reference
+
+If you want to ingest metadata from Looker using the DataHub CLI, check out the following resources:
+* Learn about CLI Ingestion in the [Introduction to Metadata Ingestion](../../../metadata-ingestion/README.md)
+* [Looker Ingestion Source](https://datahubproject.io/docs/generated/ingestion/sources/Looker)
+
+*Need more help? Join the conversation in [Slack](http://slack.datahubproject.io)!*
diff --git a/docs/quick-ingestion-guides/looker/setup.md b/docs/quick-ingestion-guides/looker/setup.md
new file mode 100644
index 0000000000000..c08de116895ea
--- /dev/null
+++ b/docs/quick-ingestion-guides/looker/setup.md
@@ -0,0 +1,156 @@
+---
+title: Setup
+---
+
+# Looker & LookML Ingestion Guide: Setup
+
+## Looker Prerequisites
+
+To configure ingestion from Looker, you'll first have to ensure you have an API key to access the Looker resources.
+
+### Login To Looker Instance
+
+Login to your Looker instance(e.g. `https://<your-looker-instance-name>.cloud.looker.com`).
+
+Navigate to **Admin Panel** & click **Roles** to open Roles Panel.
+
+<p align="center">
+   <img width="75%" alt="Looker home page" src="http://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/looker-home-page.png"/>
+</p>
+
+<p align="center">
+   <img width="30%" alt="Looker roles search" src="http://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/looker-roles-search.png"/>
+</p>
+
+### Create A New Permission Set
+
+On **Roles Panel**, click **New Permission Set**.
+
+   <p align="center">
+   <img width="75%" alt="Looker new permission set" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/looker-new-permission-set-button.png"/>
+   </p>
+
+Set a name for the new permission set (e.g., *DataHub Connector Permission Set*) and select the following permissions.
+
+<details>
+<summary>Permission List</summary>
+
+- access_data
+- see_lookml_dashboards
+- see_looks
+- see_user_dashboards
+- explore
+- see_sql
+- see_lookml
+- clear_cache_refresh
+- manage_models
+- see_datagroups
+- see_pdts
+- see_queries
+- see_schedules
+- see_system_activity
+- see_users
+
+</details>
+
+After selecting all permissions mentioned above, click **New Permission Set** at the bottom of the page.
+
+<p align="center">
+<img width="75%" alt="Looker permission set window" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/looker-permission-set-window.png"/>
+</p>
+
+### Create A Role
+
+On the **Roles** Panel, click **New Role**.
+
+<p align="center">
+<img width="75%" alt="Looker new role button" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/looker-new-role-button.png"/>
+</p>
+
+Set the name for the new role (e.g., *DataHub Extractor*) and set the following fields on this window. 
+
+- Set **Permission Set** to permission set created in previous step (i.e *DataHub Connector Permission Set*)
+- Set **Model Set** to `All`
+
+Finally, click **New Role** at the bottom of the page. 
+
+   <p align="center">
+   <img width="75%" alt="Looker new role window" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/looker-new-role-window.png"/>
+   </p>
+
+### Create A New User
+
+On the **Admin** Panel, click **Users** to open the users panel.
+
+   <p align="center">
+   <img width="75%" alt="Looker user search" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/looker-user-search.png"/>
+   </p>
+
+Click **Add Users**. 
+
+   <p align="center">
+   <img width="75%" alt="Looker add user" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/looker-add-user-button.png"/>
+   </p>
+
+On **Adding a new user**, set details in the following fields. 
+
+- Add user's **Email Addresses**.
+- Set **Roles** to the role created in previous step (e.g. *DataHub Extractor*) 
+
+Finally, click **Save**.
+
+<p align="center">
+<img width="75%" alt="Looker new user window" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/looker-add-new-user.png"/>
+</p>
+
+### Create An API Key
+
+On the **User** Panel, click on the newly created user. 
+
+<p align="center">
+<img width="75%" alt="Looker user panel" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/looker-user-panel.png"/>
+</p>
+
+Click **Edit Keys** to open the **API Key** Panel. 
+
+<p align="center">
+<img width="75%" alt="Looker user view" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/looker-user-view.png"/>
+</p>
+
+On the **API Key** Panel, click **New API Key** to generate a new **Client ID** and **Client Secret**.
+<p align="center">
+<img width="75%" alt="Looker new api key" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/looker-api-key.png"/>
+</p>
+
+## LookML Prerequisites
+
+Follow the below steps to create the GitHub Deploy Key.
+
+### Generate a private-public SSH key pair
+
+```bash
+   ssh-keygen -t rsa -f looker_datahub_deploy_key
+```
+
+This will typically generate two files like the one below.
+* `looker_datahub_deploy_key` (private key)
+* `looker_datahub_deploy_key.pub` (public key)
+
+
+### Add Deploy Key to GitHub Repository
+
+First, log in to [GitHub](https://github.com). 
+
+Navigate to **GitHub Repository** -> **Settings** -> **Deploy Keys** and add a public key (e.g. `looker_datahub_deploy_key.pub`) as deploy key with read access. 
+
+<p align="center">
+<img width="75%" alt="Looker home page" src="http://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/guides/looker/lookml-deploy-key.png"/>
+</p>
+
+Make a note of the private key file. You must paste the file's contents into the GitHub Deploy Key field later while [configuring](./configuration.md) ingestion on the DataHub Portal.
+
+## Next Steps
+
+Once you've done all the above steps, it's time to move on to [configuring the actual ingestion source](configuration.md) within DataHub.
+
+_Need more help? Join the conversation in [Slack](http://slack.datahubproject.io)!_
\ No newline at end of file

From dc9141a6f405aba866dc8a31212dce9d9fe3fae3 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Tue, 26 Sep 2023 13:34:36 -0400
Subject: [PATCH 055/156] fix(ingest): bump typing-extensions (#8897)

---
 metadata-ingestion/setup.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 2387e848e68a2..80e6950dc5ace 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -18,9 +18,7 @@ def get_long_description():
 
 
 base_requirements = {
-    # Typing extension should be >=3.10.0.2 ideally but we can't restrict due to Airflow 2.0.2 dependency conflict
-    "typing_extensions>=3.7.4.3 ;  python_version < '3.8'",
-    "typing_extensions>=3.10.0.2,<4.6.0 ;  python_version >= '3.8'",
+    "typing_extensions>=3.10.0.2",
     "mypy_extensions>=0.4.3",
     # Actual dependencies.
     "typing-inspect",

From 622816dcb8bd661f645b37a48c8f9d9aca7be9f5 Mon Sep 17 00:00:00 2001
From: hariishaa <rulezzzz08@outlook.com>
Date: Tue, 26 Sep 2023 20:51:30 +0300
Subject: [PATCH 056/156] feat(metadata-ingestion): implement mlflow source
 (#7971)

Co-authored-by: Andrew Sikowitz <andrew.sikowitz@acryl.io>
---
 .../app/ingest/source/builder/constants.ts    |   4 +
 .../app/ingest/source/builder/sources.json    |   7 +
 datahub-web-react/src/images/mlflowlogo.png   | Bin 0 -> 19569 bytes
 .../docs/sources/mlflow/mlflow_pre.md         |   9 +
 .../docs/sources/mlflow/mlflow_recipe.yml     |   8 +
 metadata-ingestion/setup.py                   |   3 +
 .../src/datahub/ingestion/source/mlflow.py    | 321 ++++++++++++++++++
 .../mlflow/mlflow_mcps_golden.json            | 238 +++++++++++++
 .../integration/mlflow/test_mlflow_source.py  | 104 ++++++
 .../tests/unit/test_mlflow_source.py          | 133 ++++++++
 .../main/resources/boot/data_platforms.json   |  10 +
 11 files changed, 837 insertions(+)
 create mode 100644 datahub-web-react/src/images/mlflowlogo.png
 create mode 100644 metadata-ingestion/docs/sources/mlflow/mlflow_pre.md
 create mode 100644 metadata-ingestion/docs/sources/mlflow/mlflow_recipe.yml
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/mlflow.py
 create mode 100644 metadata-ingestion/tests/integration/mlflow/mlflow_mcps_golden.json
 create mode 100644 metadata-ingestion/tests/integration/mlflow/test_mlflow_source.py
 create mode 100644 metadata-ingestion/tests/unit/test_mlflow_source.py

diff --git a/datahub-web-react/src/app/ingest/source/builder/constants.ts b/datahub-web-react/src/app/ingest/source/builder/constants.ts
index 61667a941765c..dba8e8bb1dce6 100644
--- a/datahub-web-react/src/app/ingest/source/builder/constants.ts
+++ b/datahub-web-react/src/app/ingest/source/builder/constants.ts
@@ -27,6 +27,7 @@ import powerbiLogo from '../../../../images/powerbilogo.png';
 import modeLogo from '../../../../images/modelogo.png';
 import databricksLogo from '../../../../images/databrickslogo.png';
 import verticaLogo from '../../../../images/verticalogo.png';
+import mlflowLogo from '../../../../images/mlflowlogo.png';
 import dynamodbLogo from '../../../../images/dynamodblogo.png';
 
 export const ATHENA = 'athena';
@@ -64,6 +65,8 @@ export const MARIA_DB = 'mariadb';
 export const MARIA_DB_URN = `urn:li:dataPlatform:${MARIA_DB}`;
 export const METABASE = 'metabase';
 export const METABASE_URN = `urn:li:dataPlatform:${METABASE}`;
+export const MLFLOW = 'mlflow';
+export const MLFLOW_URN = `urn:li:dataPlatform:${MLFLOW}`;
 export const MODE = 'mode';
 export const MODE_URN = `urn:li:dataPlatform:${MODE}`;
 export const MONGO_DB = 'mongodb';
@@ -119,6 +122,7 @@ export const PLATFORM_URN_TO_LOGO = {
     [LOOKER_URN]: lookerLogo,
     [MARIA_DB_URN]: mariadbLogo,
     [METABASE_URN]: metabaseLogo,
+    [MLFLOW_URN]: mlflowLogo,
     [MODE_URN]: modeLogo,
     [MONGO_DB_URN]: mongodbLogo,
     [MSSQL_URN]: mssqlLogo,
diff --git a/datahub-web-react/src/app/ingest/source/builder/sources.json b/datahub-web-react/src/app/ingest/source/builder/sources.json
index b4ea2db018bd8..1bd5b6f1f768b 100644
--- a/datahub-web-react/src/app/ingest/source/builder/sources.json
+++ b/datahub-web-react/src/app/ingest/source/builder/sources.json
@@ -181,6 +181,13 @@
         "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/metabase/",
         "recipe": "source:\n  type: metabase\n  config:\n    # Coordinates\n    connect_uri:\n\n    # Credentials\n    username: root\n    password: example"
     },
+    {
+        "urn": "urn:li:dataPlatform:mlflow",
+        "name": "mlflow",
+        "displayName": "MLflow",
+        "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/mlflow/",
+        "recipe": "source:\n  type: mlflow\n  config:\n    tracking_uri: tracking_uri"
+    },
     {
         "urn": "urn:li:dataPlatform:mode",
         "name": "mode",
diff --git a/datahub-web-react/src/images/mlflowlogo.png b/datahub-web-react/src/images/mlflowlogo.png
new file mode 100644
index 0000000000000000000000000000000000000000..e724d1affbc14d53f0ec8d6d5304b8aac1dd4f48
GIT binary patch
literal 19569
zcmeFZi$Bx<{{ZS!N>V6=a?Q2m79scOf)H}QjO3R4b#AjMlFBXQewmUx6Jm^!3d#LC
zj7{#$CfBhs=dI7>`#a}vI6WSfz20xH*Yoyz-k*DI_)zQY>C30-=;+SsXy1KAM|X@w
zNB7s;QzwCy^{M#<;J?58A8Bd+HQp&64t!wr(YExbqhn$_`ui7MW;Q1sogTZ+T@91q
zY~mDSI=5-V;kt2~82<<Uh^u)v>_(|?by)A|$t3?~2(r9j^gSx3csl;gbiEXZkdFDt
znS6m)5EJ%FTOW<4A2PD{FZ^)ok^vPd-O+XyvA3(<Iy5nGUUp3Cezd46H-+!p%y!XC
z*TYZ)4Mr-<400YFf&Ksg`TtM=?4%K*44yAtEE{!HAE{n7yts7I^i`HgF}@;VF6?5v
z-Hsr%giqvzdjEj4;9v!e^Y9DHw!(Eh-x9J+QwCHU$WshUZ_>WVzBY%t3+(;a4^`^!
zGJ*<w!??JZ@dcW~u@Vlk-!JA6aqoe5WyJEsMq{?_EG*LQ-|LKWgZUP^fvS;&!N2I}
zo@I+cL~b2MpVlp+CemIq4u(s&hQ7Y!-ffX)X^5v-c;hapcr~X|zJ>WUzS+8zQgZ+t
zcY7)ZwX+_1vz*|h`4?1t)^+pcj2=ef81QKFsU%gbUzB3+7suFPFvp+g?OJXYwQGVQ
z9zAue1+RUGm=j`qRY`<bl2p>Y9ln#xJ_W}T>Ht{i{7$z8WlD!3snTkxiThUft_S%~
zDFN$WntjIqf_)UM^jj|?@T=9mKXzb=Asz>wtXdi3jP6nLZ8_Cvl%6Fh*GE(6F`o2C
zPhM|vvyC;{Fsw1g7mYZvkmgY-!1C>#iJ!{FSNC3|t))D<udFm2aiaVeGeC#SLnudY
zslo7Wjp<&;U84;UM&bnUe)Y(s&<c)rWFs?nJZ^U&ASVRl`DdMN+0{1Bfo!il<O_Fu
zZMTG~J=$IVYj7Q7!*zew&ow-W&AnjvBNBVO=&Z8M<MZRKmN;{9Nvgq13V2o8*9bEp
zGQHElJhl@7@l7SLAK}9$>%B~~%{yFBva#k$B$;iduC1YZfCpb*>?YExHhnyOcQy7M
zOs@ir?>k!FjiRWqd0`rvG4e+|Z1j>n+8buSev8cm4(2JBNDwv6vs*!p{CW8%z8cNZ
ze%P(?sZKQ&<t6v$-S@NrX3g5|Us|1gAmn9Wi4(r%0r`ljXC7hW<Z23dWAIL8A!=Q4
zp>Mri8NCx~UmdtaozXJ22!#^@kAR+K``)8?3UAFt7!>A4^Edaf%<>gqRWm<<(zy7>
z*yJtu`v}B06AeBS%b6QiG(Xd;XuW$NYH>(H)7^X5$v5!SK=ole1w55^WE36q_m&PE
zBBEq?Ng!{p)@6etLTAxXu!&)2fz=EL%8LUya{Bi8=GWaFZu%|#4p|s0hlxApIYRk}
z@AVwZinh(89^sQLvt=XuGXxX=@t`SQ;K>)X<{C-lDsqv(oF!IRY*@73iwRhMD<#nh
z@naqB(H^C~Z+-+qJu$~f{E@~E`UUlR7Gfy+O_EeS>4@uZ4WgX1GWu5or*>|umjQx|
zsQ^~#{JL1SSU_-3bNhWA4D@JagV4yLf~Q{;&3?I)I;V=j9xcvUUubD_dP){NIE#@u
zTJS@xW=yYoWXAHf<9(-&&g4I|YGl6oS7>7XbhCD?#AGY@2*hXmOta)hM&f$DnBK!c
zkQ%TgeQozWEhRsMSb=b706vREFI0PLeokc}bqcz{{5}yjjwG-1id91b;~?Q<WTV>v
zpAYEhGD(NrZOgEI=fSFmwKeI92@LpW^~Jhq+hSd`Z!35lc<alYkwsH7-%6QdB&6wn
z6sQ9w$pb8>t60sLhuUv=57ZEi_LH~HM*giuI`df-GNcdQIr^B}CxWp@XWwAl?s&0?
zB>U7I{UWEJ`mp&a1~LJJIRkvuQ?gB;yuvx5P%Pd21^h?A=LP}pbF|-Uoo?3au@3x5
zsQ!b2uy*`!0rdTVOg;Y)2*&A1-ja2%s9|0#*FI_mMNtGW!ny?b8^`c^1Yqyz>OKQR
zV@Y<W@2g8Q{kT{!X`Ol3?m+u+5S&9tm)FkY-+f~(Qwh8xTo)aZedq&nCv_7izA<f~
zBU%p_FG_?r5UTHia#akLl4A`TB`?y^4cEuDqCFt2lUl5Se2+EpLr+>&U#&eF_&H^h
zCQ&LJlCSNPFf$8E1MHj|ck9&q+X|<0J*BAj8^=e&bfKX1%Oda^9QNvX9-w#X|7>_G
zqVZ1DE;oRuBvb`}9uQ=OV`$GHkSHBp@p1Z~pnbz=zgAV49>_@PC%9)&k2paSy#(;(
z2m@x;NuCBk*RxT)E~w1nQq&3ofVb`LJVqnMUf~HS6FNFWhEt^Y&q7)NoANAtShnys
zY2t{2fJ|+l76HXEaLGTryq>AiTY}h#)dN!h72G+f*OSJ$6&OWO`it&%4u5^)l?!6g
zy{9q1lxA%|dS6^3uGwH1>F8GITc^&l?b3v}CTA5I5tf-H)JxA5?s7C9QGb{*qVYHr
zyN8RWosM+9l#vG69?`{qaoJVI`&DXRAV>^Akc~cwVFE?oICfBO;saa-T{)yX3&tr%
z?1H-F`AKXlAPz?mJ_PVQwapmef*o4s<|tXHUPF24z&Py$`>~L}0Xfw=_1q9HdU^6V
z^aCNOAjE$E*|veqv4cAp@G-h)N}4_#qkus1wR=XiLUY94*)E_wjH06L#5Z1EebDRs
z2Nps4Ao~;0w~WvnfT<76N*1i^f<Se}(y$tBAE|foe*e9cm^;fTZhXsQ{~JK4{oCO^
zFuUZAzri@u3bNn~e<c06C`b=Ictpo3h4D4deU87uxZ{)N6tR^c6bT8vQclP$$Usr)
zajejxKg{f9opcJZ4~u^6WB1|fpfAFmUCyrM{?ge2tj&=;tpIx)V!!e<X-lOrk=E@@
z32)G#Gzqa`z6*>b{}?Ljk2qb5Xv|A>iq7mu-WRLK%qq>oiYr<7gn)~3`!ewR7vN5s
zc%_nQ+Xe@>ob?ZLk^r~V|CY5o^g-90`Vs<!E>0+%BvUe@c_b$EKUNxTFk%XhNO;$W
zV{o#zh7rmGkY|v~#6qPVJpLhKMm$5|ZW{$P4JhKd<E<d=x7klL@yQV`lYFKU1w+8K
z5PQ?Y-;V*iwMG#CM#Qj(ft*K`zSz!De6_(aisHcBUUX3W4%fM<(b}IIe|C8=8O9kd
zhh9$3pNNi|S=`qSzjI<8Vv5~3@KhNoTyQ6n<(j{>*8xa*80#~TM;%)NbngE8(mh5A
zwmLyM&t-t*5EjN_*P=raah`xa94pLZCcRg?a`uhlsj}GCbLvGAbNk(JLh5eHd2FQs
z^!@@CF#6J$Yk4MT$qg)}vHI<P7uz~q4s8fQG4$jJwX2&)+i=DK+a%H+F-M3w?7m9E
zn)-)O%i(V7gK5_I9ez@8Xd|Gb?pLHnHu8yh_C|{3yQM$#ocBU3xZs_kR6%IYA7D6B
zte8o*tcN(Tp_2*AMPa_a{nuWS)CHHy+Ge8tep@#Z(2$QmyD8Tbq7ac{4mH1(l{ZCE
z1zT3{J-xfbj<o6hGUrH0!)l2AqW~V0OQy5e!1w%o7W%;HHvH!&Tc}?)W$}T4&K)Qn
zpXQaA=y_3*%l*Q%{mZE>P4LKyYBj9%Kzgs_fM`HV**-Gau{u2`$&cHv>rID#4Y6=K
z!>k^=SbAcf2)x+SXX_D;O|C0Pasfc;xyjda%oJ`pt0TUNIrx;DN>GRy>U~BSHGqr}
zb9XYll>u8}G3?>PGIMzIxjU5k*!BGIt<r`zXia$TwzX3Pbx=^|4{TM%ozpl#oB&(b
zsR5iBKL_J7sO4MsKIL)f#`T9CB661|7XyjEPOKjz+o#;PIb(+nF74UecOhVTm((Z5
zj!?W&0&J7Hx_GY>L;^s1Tx@9PI9npGPBp?}NzoL7{E@i<;FhV6W@(vVvM>_Sa>z+x
zGPX_<6t1(oID>R-CIWWC!8EHC1b-X(=_e<5=Yd$PUCzN~c9?}wJrXgd{5S8*GS84S
z)-N1EBE7GD_^4I_sxfK+z^~3aMq{YKc>&P)0?_a->)J${w)bnpS-1>p_Qr#-xIpzc
z;ZGcn7}s7d5(z1m^5ZeP^eqBnp(MGvSl{2+_@j<teeZAL1rIy=cw5&;@~wE6XoufF
zK@nW;NjG_y?GV0{klBIiXPUuh^^-q8z0Dj@S+bW^YPesoSApep?K=*AeL80C(*`$E
z-1|HusiHIBI+g_xYBPX}QyY5E;0~I2f$^G&&JzeG4#4kZ+}gIpI31&N8#B`4sl~s$
zX8cfEW0`pN>{*wLbzio0$p_JUjZEDbmh{S^j289IFuV~0iAfbqQ1SCN#`_lH6tb-F
zp3S)q&9I-tu-BL1I@HHBs^<cMs~dw!UxStqJOiVqy)@qKM>9fMm`;-AN-H277$;pB
zIXs{aS{f0uEY38Ck@q!9q0Vx(<fe+Mnzios_-21DWr+IF7>+t`NkqeQMh2+%n~TOn
zYp$!S8S;(L`Br}~7hztyV!uh>4)hQD4=h=?0}A#+i!LR^3mU4T*YUjy>Wdz1k6C;`
z7>kT))hQ3^Vt0M}@PK{$3L|~@hrN)Nn|A8%?Ntc(<l8>TJn-HHHp*zs3G(fr;uB1D
z{-8Rc*9!Mx#Vw1dhdMQ0cF@}nkiHJRtKRObOX%^~50~a&pJ7{FQ&5-992p2|*9lUk
zJ;BHwdv=xW*^W%>#vSMVwkJO*MVC0TI&KYv(SkbarrPQ{(%9gNOYY(fy}X+i>NnnR
z<{$`lN91|GE7Dp+6nWh4`5;9VFnk#;TaRBimwpOB5gvIek>ck<YG?yNj~pOtVQkfW
z*-y3k=7Xi74sTEHUCOym_MJNJ+v9{^p<9V!bg`>aEY!~m+@eRW3CY5)pZg?#DW>~J
zTj^=zLOs{9V0jx9!*Pd(OLn*Re}4l)sTlEdlM<(9`vBGy^~|FCz?~mH%w!0(qH77a
zF$K=Vk8SfggRT-Hx22?Kb>4q!P8skwH+Jc3bIxK8n&K+^I^!$W&nMSX2EXZjiWmpP
zP^Xi~#k%IJE2y~}*Dn;Mt3eI#Q9LfL1hH^=b8$acyMcaLEEC~)KePr-v!`$^yQZ9D
zPcYg{<x#w~K)|HqxChl!c|UxZmkI){q!+-8U$!Mvsma-jV_E!)PM;Jc3KY)n6tW)u
zlZTfc%L@1`bO+6dV&+=yX>y7#-MDH%S3qsQ>dHH$4X^xb8O}h1-K36h$=&#UaGYMV
zYuq6$Q_R?2&+s5=j|^h|w(`ZowLNXf^Rhb+n-eejrtE}&f8_IZiwP!Kw<iy8w5md2
z#o<DnlWFFGDDZcd*}Y>>eH%<hL1BEeLeI+5fvMV83VX*LeIC`t7AJcmsM3CiF)af#
zEGt8O@v->BpdLmpLC4zHPAYay7JFEJ37Qp)d!M+KUMU!7dPmUGwb@H1NlQL{s|8ap
z8PxrCaM?BwE{&zlu|wEQn&UsH3vLN(Q**zk<f&g42~OAsFEgy0pQMFQ7F$~ql0vYd
z^;?QuyR_DpZfhTf=LcU)*s{tz-BogImy}Yi+PhSGhqF)Y=xc?Zq#c|`e|kNgt19(?
zCg%U_q1L|Chz$-0?n!FW#=D{`pVKM`N}}a9JDExl1BK_oM$DRf7VRsRL{&VZpyZ}V
zefZQq;`E6iHZQ9rG71qQF5g_0IN_pJBnk160&t8P=VM&moXHw-n#w2W0&$n?E-v8A
zzW8RCEqeKo3)T+ZC1(!Qj&!`)8aJ&Z{&npA{OW*;!UDDTCf=@F<LZ=|DiVu&D+fB)
zuF^i@?BN4iys8zdv3f&3^vd(;+$ta=!Y0m_q<m_b3vgUZJ%2IVrdzc9HU}~n2wfgz
zC)b!l&ize2;C?uD9721TK*)fvA9lGnU(#-r97F(NjxT7V_+DtHDC)F%b)8I(bM;dl
zTzd2Pr^&SKlx2Cgo4XV)H3?_>ZxPf?+8V|i|Ff?NHcO^8ZlM2&jrH}JF^7aH^!xX}
z+PPo!SH*U8%eyEcNB5g#yZfLr47G8Kip?@m-kQ~D`UpsS#O_V+v5wF>Ep$Z~Aj_3(
zjB40hzvoPiA^7!V^-tdNqv@>SMxi$<2()<XD;cD11JAdNB>N3TjBr9KUpq#7+iBLq
zJcCb7C!L7ygc;1V6sHD%^?s|>c*uQ<w4AZ`yiyQ0JBd)0r*#vCu02kHtKR1SoQ*Cx
z)_th{BbLF2p^q^(b!+c(U$V-6$W8UdLGEFh0bc{k>sETml{eG56>)rfUaZQuwK}71
zFydnU@bk+AA#08nS9?5@b1fN$e=&Jh@zg}!{lVdJSw-{ROysI??zH9n&zyYGNPfjm
zkG1?q(D7UHCL|ejUiP7I02TjR)xD9=xrH(+E+6D`s5mi=JCsvRZVvtKJ$Xytc{d=V
z^-z@6s+ZR_s~g{!snooU*_4MVIkd<6_SES2J<19X_h)wd;TFIH-5OwsFXE77os>0^
zgnZb<3f6q{)WIzQzW%~9DiakQcesq|t$OOQ20=2`UiV`yFKE}7CD}mp2+avPn;&?3
z+^?rjDC{Q2+oUz=N3}IVWE388x!PnsZ1kUs@Vr6YCD88#DHOTX$&TaV)cwU<RY9&L
z8{UH{4)TW~2SW}odz%{1k`~q;_G$d+_S=V*8~9!J>1nY}iuCLYEtl?FD{G%%;W$OQ
z1mu|pR-fwnF~IB7aRnpb@g%(|-HPa3Lu5U||E9Bem`7!MeC}bd#@kdrBDy7D>ac3#
zScAJ=-K}kY1I+`k2I#zqU_ZIgyU+&txa`ABSrdO9v<{s;GQKn1-^cCbV_^Qym8j%1
z+tS#)k|j?&_~IV&+l7Vol!OZlKKJFzm&X3AlAf{Z@jHyUYEVk<0+sJ)t33Ad^&-8D
zBx>zzM$)A>W+aC|al&_IHKPseR(T4cy6qze?erG-qpW(!&{(sZb54#EA7`I@;Ev5S
z>TuQ`W=E_%BXcXst`QMbb~?t+qi!>No>DcwB#Zr&Opy188*e|cLuJL6BW$+!&mbGZ
z`IIJNjE^56;|>vif4Plh96py_=aRDF*&qLNbB-sZ;G$$1-VZug;HO5D8+}8&rw~Zf
z$K~Awo~H~)JYEW|m}_;B0X<87EgcfAZO7#6h4(Q4S>WE6)Q|_;Cvv-*3kKwB9JY-R
z@W^!;dy0Y|HF}N(L6|Bv4A=9sQ`8I}T?p0rQ8!P893#+NLh4lIw){KTCm9y2ixjSM
z?OUeA7-7yFklX*hB{^!npIwJ+(;}s>og1tRE%Vt_MWS%`Ti2(Dx$)(Il6;@Hd!;q9
zFSI5(i->a6fxQjGhT0vUBA|LDGW@A;(E7ok^hi456C35()RbfjVgy#ac>3g!^{|{G
z8sb*JO6NU0>Ex2{2Q^d~0bj5C-ejiv96FUF_H!r5ZQ3P4vA*nimU!D?>AseUlG4H9
zH}Bq?yEP1_UZmC}sBHWI^}y5S`L!R!7Ioix@=>&V{{XBwrZzCp-~Sf7mrhS!{lS~Q
zZM-~u4kM>4V?XEoycxI}y@}n;o+G*dWT}yK5~{g?zqIL#Iq3V|o<NJbujLi#S}euB
z`t66{S&~LrCc!MWIQ;SeTOH)_*X?VT-jq*pRfjVCt>2B(;`J=zXBB<?i+;lOpW#>k
z7PhR)3dHe`zw04Gi$CedSmqx$+QhNAtj8qEML_NY>9<e;0+BgBrL<wSQPO&_-5O?1
zEaMZh^93yyE(DZ^L@|EyW!C(>l|F3jAkTKeV4dQvE_F_?(X?L<2%WPSb)(aXy7c5^
zYi7j4*}J@JO2p{(Ao`UxlQfrbyBDoEuj}<$hjx27`&Q`mn*2*jc#*Nw{Uh;;=<4%l
z<p#xWo(#>#xsL3U*%Ze_%pLSYHN50oc*n2QyL`OY(#kE~NYA2@gV1F+z4|awF<n%&
zQ4(A~PQ1<{|JP0;_lu{$27;y>;g@SNwq_4+2X&no8q5NvHC<DGAbD|8%L}|~zhOKA
z#3+PZlZJALlvHd5{)-fp&DCKu8b$;1HkY$8xRFB&Pv)BKb`8TE|M1AllJd+e!NEq~
z5Rv+++(@FY(&fvSzjnj!$Y}cXX&3j&DqqAR3wV{5=qb?vSQpzC-@M4-27MYGt6E`Y
zdjgq?emZU>3lQt58UQ)gmBHMcDtI7?5jJNGh!ov^{jK=EpFy+NXS1EVh39~nr!Ms?
zcNj5Fww|RI%0eybjs?W2o;g5HO@M7eEiIDx<C{eql1|sB@8$3Poh=7kLIc7wX?ZPW
zhk6;mhU)z*jwgXQZV^7@;O6euT{y_~If|lI1-<(&dlohK>*Rv;;?we8%1k%oGTf_S
zl0p!;YMF&q-{PEQB1O8RyeZ^2)4e{*2>jNG<@2qoTtE`<YVa2wKgIA70jWXq1j*gj
z%Sv{Ubg@xBaBDIpl3(JMn~z)fscyn5-y`a4{i4gaF1FY7$g5tfPvGvcfWLIYRFqsT
za=|7um1DS<qdjoJZog|4aMNm;a8-T=`*<UDhdX!fd~<K7{*gd=$3<iZvOu>Kz)J5h
z<0@4{bdV1Wa{Ea?o;As^d`@evJRS$hQVKmB8^b|T7_k_ab$|E3zacnTa`56!k~3zT
zSsJ@3+Hf$-8G2#qBnDlgP4-PZP$raWhNbq%4D#DD&VDnCyfhFTLH*Ll`!-U#qquT?
zbDj3YF>X5dd70UoeV}H-10Ik4+lM~@7y20V37Y$RmJ(!ffj_Uv;huw5ACO^$NP4ZV
zx`rNjX5m2tFLsASb~$zL6}Hn;hzrjSJ5<WQ&N29wjll$B54*tVi}ve0wyf&cP08u7
z#cy*w2&}OpmRERjtWtsWi0LP`K~h0h@swYz5*HAxRR$cO49-o;vZlt%D&Ma<o~+0^
zis3O9gucFQ2M+`-bni4L^F(iV9NG5pg&ft6cxMO@VBzIx9V?9_2_^|UPK%?|?s99M
z2ktklXJt07{Zq`EM}m#s!BG{7ZDK8{G-Q=JxEpiid_9B=*KM6}wpOaZTwY?iRUzw+
zxHHLlKeS_DEu%l^%_UiRo@E~0p87C1=w!xCbmx#=&u)ClL51M9AV>ihE5E54H2)Z)
zz>`)Fn<@efskZsM*GXreFl439>mzhi=b}2pt=HUdQW88oZ~6Wzh3e-Kec8q!BsaIU
zgK3M%t=+zQa*lghLZnqHpIa{GUY1H9p=WJ{ag-0@{&nt~EoV%ak_+y0eapJtg(8<E
zj1G~jV=`Zw_IxwN35afv)k=1L6MdNRQ{xqOP`i6Kl@UJFVVuUOTPb%)laV!B$V(N!
zpLWSk796)1@3Vj>KA}y$ULcq{tK>vlk+}N&Gvy+v^q^2O;wU-@{G_wCi|mxKtnD2}
zKboc^d0A;RtY+M%1~I%p_=|0<1D(4$1^_IcF+NUR5w}+Oz`yqvInQ*3FECW&NqTlR
zj$7wx_^-e#FKF(@sjmT06=)TwV}gQ41vkfR^So<8MPlC@tn9C{DW3Apif7QXP#5gF
ztnU*Vb1A$_!*li$Io&hd#SHx?pp#YK{N}8(R4r*dR90Q;+H{J<b|H{&+7ss7w-9^_
z#4-UMe`1x&BQ{`uv8@OtR#k;2vweMjkZ8$+W@}E~wCJr@0B_MIDWx}nWOjGflAR}^
zVdvY&o|Qlk{a?R0m~4O0L|wpzU+4U7)5|#~72;jW1F82PwqRj#osn`ub*d?mkCnn~
zJQ_xs{0wI;+Fd-hpR~tbQqH;^5>+<Q?V#MU0-sjXu=gt$z11?sG}|&deBwU#d@4?K
zz+4U}b4mWps`_1p*82q26Z;H+0!lW0#yfwBTvQ#L{7GM8-9(BT%x~*$YaVTZFHB6w
zgI*WtUl$ximM%CeJST3<-E-#3eT$Wfhm@N~wMhjI=Kq}B*_BGQJI-YiLwl7>@#xaH
zrQb0K_)gc(Z$FeRTnMM=b$S#YB$%4c?}fMIU-nKc_I3xT_6}ijB{*+g5ZXXU65ca9
zKep!nD8SUt*kvu@g(h`YKSwnK>XtDP=(A=iv&mP++_P~*>%wwhumjrN_w7E866pC{
zv^EvODrMqH;pdPmH0~23$FN;0HN4OX%3|Ecu3|XwYx^_S);7^5Y;IoKMc&$@VZ%mR
zL>FG_p$(=v$`{E<q-yVcQ(0JdRuQvVWJ{8Ze*uv0IW1{gV?k#msR;w_Oj3D1!rpB$
z{h_YCH`i73J-Wu#-fjVY)dO6r2U(LE;?i+CNH+yi>kraGxPu<YJdmyM!m|fIa?fO-
z*otz@_Et<DY`C@_%)EG+EPN$Dr7@$U8RzFo@CUOY$`zSQTKr7OTgyY_n+b=8%n_8|
z>wSXpx4w2}%WgXVTDqyHJDR0d7-I#vaiZ-k$?#4d$MY4Z*M9*yYhys0{CKY14+k6B
zUM{YP-s#1u^8{&asBmHN!{ZQ&c7kFV8bho07YP7r(m!70Qg2IrZq5;urpCIz*ttY7
zu?VdRhCZh?Osd_8#2-p0+Sw{-KdiR14ukhJepFf%`&h^kgF--OKVQPwLmy9iwlmoP
zw4z~!-MyYZ{nT=E!t4mBRsKWCptrBriaTRs>pte;`TeI;)vqfy))WoUGIp8?MUvvy
z)by|yB{D1)fCrG*lmp}*(-$R@V|Yz<*Y(YiLG&=zaC7l@>;Z)_HXg?P;PjBmT0iRd
z*DUI>mD+vJbn7}*NXNRs6~>9QcaHiq+)=|9omb~kJMS1x@jV((6B1N+#?tCZ_kO-u
zhHH=H*m4u9%>d=`S%exA(Up_&Wy1j@9J2y%6le8fUlRgZg^Eo2`PlW_)KNvegMHvY
z-mB~qGJxo@xo+}#f8I84t$TOxuxP}SS4Y>cd06evy{gIA?}3CBTf?H}PQEHS-K^Ze
z$hR=0TdhlOYVk>n$WJ4~oLLm<ujLgnV;+Ny8}^C{3Mo2^7z1KUYw}&<!?aa_dV||<
zn>x6Qc{!9%F|2a{5q2lj6Rb<Y;J`HyCOdo!-R}4Fu#woGJHa^#_gGyDX(4tz=!?x-
zbc;*Dmq~klZa(;SqVw8<ulcK0S)sLR?zRrdCFpqpmC)fPrzfIkhuEZ336@0>ACi~D
zKXB8J@YN$ijF(aC%#n>XAv9~5+QW9!^R-gGmZGiD{eAx2G)bf|_?ZsNF2WtK1-=7;
z1_y@GgWv-9OzSg5)f#W>?MJVb24mdsnZ;#bYon3#Tw+erGg%cFX&cU#2^HtG?`q!?
z9@w#!lr*?629#KpK<x_!AB>i0E;|W8OU3!JEt=okguyo(TOatanvlVuP3`W$6z;8=
z<$x$IOs%&VH2!1gboIjs>aC-4E7KN!j#+wP`aIwmIK}7`-8;2qdU2*ks7q-X(P8KD
zbvx9$73vJZcP4GWdY$B7-pt%h&@X3aEA4im!M_u*ECKwFj8UK8qmwz8H!{rDQr>?^
z`XO)*h#IV5y_k~XhiVxYnM*fqFpa{_SV=JJq?9j+oSN=QO*Ul7p4=C{=fhXzZno+c
z(KuYuHB@zXwcL<*%RuxK&U~bLl6Gm<Uay|^xuRdKEFW2j*;VMn=#Sbm*ze!_Wi=se
zi-*12{~mfHg^SS=B3NyEFEpN*EF|{Ipaxeo>WB&{pAUp@-pau?H*Q;P+?FYR2v<9}
z2!vMWYoHwu7|2aFZ<T-_`BIlO0FNucT<-j2hbwOIK(%!*d-=ROczu2GzKnv8j@bi@
zTp5}SyWWbf3EI<%>8gd^umbD_WJhwPQLFj_&_lsK5oF_yOsXyd3^Y6Pux?907pVS@
zttNlZ{=GHC(nYPQT{ye6=!r?>iqP0*LAXz`*TlKgQ+;y?p~&{8lHd$OzHCsj9zYD~
zT#0~bqWY)1lzhNj5Ry52?bW@tT*XHJ?SZ=9I>2hYdbsB+@}T!K2M)!YEcT6l!0r@L
z*{j-0JCse_8uNiZwBmHqrIZe!7(Mvdt;}Fy@2Qcw+V3!T=$i*xSsm9bVF-@bi1^t|
z6GORQ^W2zPubA;(yzX{Bkp3~jY#)6RK2{ks1RBu-p!xay?%bEG_(h-;)(dE4y&i*c
zQd!96wo_;3Wh_LhCZG<q<x%A>a#iiIaMOu(^N((<=>!#N8pi6}vZ}BqG!g9yXjjF2
z_~~uNg|*zOT?>UG_|lkL%7FoEaxKD3Xff8rFeh^;=25gPc9-Y9txAUEon;ix^M<!1
z>^iI4;gBhmp1giTc_yO&xq@nOxskoA0Y)yy^>xvMtr6~}hwsF@MsHr)^!Km%{OQ&0
zmaakf51>ykrCzTpf0*kHfP$~fnUEapJ)_>an7qH`^t<W<0;<yJy+ADo-MMMk(q5y6
z1RcVA6nGniE|~E7ffk>p=Y^&ks(bl@2v5_$?48A;rBz${?ExUF#I{IYf$xy7xPz%3
zPXNVsdP_@~;)dgIGA#Y5n1hx=-qc@sSaR7f-)2Wend6R^c3SR*k}HEQOhjc`#RR3g
zg)0Q8=&L_i7@zP0$|cpyvp@+q9j#WNtGN;>vzGi|u}&0UN9E4)<Y=x};&>6!C5Iqz
zpJfwpIdy`()_?%t)Kl-hc#34_Ys%LO<^Ebfuqs~L{~VcZ`(xoKlrOGiUm^$qj8Ufa
z*80_<YZg5^mHY$;UB-ez#aBa$e(XfIB@2^``&)j#g56ob;?K9p90>If2C5fCka`db
z($vZD@zoSJo%rr1g9;Es=uTgq)T8I@4MBGhr%rG*D5vW`8y}4nq9o>y2OW`z7<Roi
zXWF(P9M;&jmpUSt!$JVyC|sb@hf)MPGxdC0dNqB*s~60Z#*?~|7em+!AAT%Ld*u;c
zzt3slC_%1&syT&~o!Tk;sa#ZMV>yn~_8uGbpJrG$@Ys0}<-3E)jG{d0Bds_POyAz3
ze#c#VpGWb+faBW-wD{1`ch!MXz>X58HvT1JcBODV+hlZ2OF2e_;oa?Upu^(H*TGx?
z)wl<X4^o{pcUHP1ArJ#0%R9LAXJlEX%DC_tk1+pn^4Gb*3{0q>b0L=Zd!+)_%YEYK
ztaWzvd0{*0!$U;~bP(S8*#M|KhiWu^C2ex4=UMToNe>Zo#a~3|?(msg3JfHu;3U98
z3vu18^g$6+ALr;j(bvaG?Nv9?zR`rtHj_fuQ!g?`=Z)*oH^$Qk8vOXUh8zdyg$vw%
z6x?nZ`RYD<sW#7J@ky_~cE!9`#LixZ=T<?`+R8ZUoY_Z(bWcy*j6+e7%h~PZI?5b_
z($6j1OZNLAoBkL}h}RIz>A=}RCh?RcQ2xlc+0~FiB(g1a+5%D&_os*zq1+1XcV(*E
z{3?`qYubxD{6XI&&>->vB?P~10DoIvQj$-;%hZb2s(xwXX8Somj)}cs)L(&`Xs5e+
z$Hd|%>1$i+pzDH*nx~U>WN@3Ac_3pW%O}Y5(&j55v`DfVl>B@#3yvJ#@Q2tbMXMzf
zb7F-ex*RhuC&^$NCj!+A4d80mQjvlHZ^}L+#;`HsJoWcEVd>pLplYEZ>|qrivs;-Y
zCl|N{N{9Nab#3<$J_c)SP|NN3#NON^Nj-rf?_@w<(VRuN!g}6-de#;z>gxQnToO3m
z^>E3#2leWAh+Vu;(@+Dw0C(SfX_NvRnSCJhO^2R?t=AR^{s)pykve!GtGYHA#mfMD
zASuPWN_`6*@T}-l0}W${j7&&5L&nv;I!Aqd9w^3<cTyQbAPa8T>s>uSMbU-4{?Q)Y
znI^n*1;&bWu0c~&`f%Kv_ewpnaPd$xsU`ii7qKjGA9!YCpuD<w)%>DqSpzt%PW7Ho
z<$WJMO&`&EbH=1iVdlEXD*}4)U{G~>G1XL46&memDf0BNGO9!-*?FbF)IKb9w33A_
zphc>hXcQLzoiLTKB_-Na5jEHaG(vnPE0^?f^*9IH@8iwLXA^Vf*KONeLQ)M#A2%1T
z2O%n4=*e#M<SR$gmk0EFjizdFg_Nf>>AdyVR44$Vx2u+^jN>ehJd?smb<EenD;X{K
z1tet8Ub~t*W7#z3_f$yXVX$=tOzG{U+Jy3*ECo|DZ|9E@yPp}iOZ@V$9(XtQxiksX
zTvJWH)QY~`3Oxl}d9~s^78&JU$SU7ch7K813mXMO0QJ4sx#bdRw$B(x?n%B>#Fakr
zru$xO<G=Ycex0CPAyn?&KJ<Ko9S5g#2#?ko*9xui-08!)N6=<R>xA`00$Xx`1nHN_
zz}`9_XdXF5x^s#|Pu3XNm{ny`fR1mx3Qxn~roiKmJYW~prMt5t?X*X4g6%^Xn1&o_
z!oa=GSd=3)a;jgg{^j4eBb~5qK%gex#w#ZFYza=e`BJ^~sM0HO*S<$X*rg2RsL?)?
zaVvINE#F9+8RaIws*ks$<7!__+r~|+y_kOJ!*@X<JZ(F2^>?IpUJIaHZyW@D{fsld
zxNM-4DpyoPo@bAs>L<y~<)j^)h$)+^Q{9XCLZHECZL`$+&jPKPPh1;$^)b$TLpvrr
zY@>CeprnnLzEUk&u!tuRFUW;(4LJq>|0^dfzXcEEo=2{JvB>{0mji?tYymxQm)t`!
zTAkH@vW!vFuv*8nAuhR?@;<+vpPr3htVoH?=0NWrX59l0ROoAwo}HB)BQ;k;zD&rU
zP%_(0<5gLR6H8Ow^gevL*RX25nMzX6%^eYnfB^A5>yc)7#_>I)@)A0w%HqV7fI>uP
z6n}$ZUJ;iG_A8H2SM|9`7x30aA@?83OfD5|z4b>8Ue7THk!~=V9qG%<iVJu|)n|Ql
zDXT(UkxT0FjnIqWxP$ob8bOgR-X+~XrTaS4R$N!+<j+l(otunion$0kT;?&(2ikEW
zJq}-O!Ba}%IiDHrTFx!GHwJ)eu;#qkU?1<5)q}(Uo)BkS<x5iMIMoGeiLkd8AYms5
zRS?h#$V;fp2q!aGW9OnNKYhvKgNZ$`lB;Sk4{H-eEG%b!(I+kwnO#Ht_J0o5SidNu
zm$u61sK7h+LC6PCJpR+V<>cX3&{HmDGoB~*rQP*~oWrGCGhMNE*ROw<SBq7@_DMa&
z?8N$#xtGp8%9*~=s9lXo)%V+j`EzV$=Bd{%bjaVkv82{r1FcgX=D-ZS&NjMIZ!TER
zas^Oy*D2mx0-qK(zQil-eIg{abrY2gKNcDJ6$ZE-)o~s#w4%}E@Q;KavYV5VUAwgc
zNXMtT8@w5qX4+CQvZPw+O>@s^-ELxYn5HTd;503kSC5w64i6m0K9Yn0{a_Azdk~w0
zysc`+?Q-`2;d!9!a-b?T$R)&FeZY_~BMvz8RX*gda}cH6kY=_m3f4?jeN&M{M9UCj
z4bI!{xF{<hq3QxjH5Vvp+PUuHq1C8QQIYDe8x=4xso#1FZ2u`ZXw$zON>MBnzCtxn
zvH%H;v1}=b3|CtHPs5dIhKSZ2N4M>?g2#gI-<zuR1Jzyd&ui)qHHftEqI?t48-vNu
z{ziaCl9UgwND?qzXP;pC*b@@#1KVG;=xWufN~|-XrJM1%Kp}kZ2T$9lzD^m2c9q8t
z20gv*hH8pFSg?eVav!C2;g&-N`TrwxPyFURTgSEh@pJ70rop>yh_2Kpp^&oM2J>IX
zMD+R}R;v+UGdu~+KTK5{29vlmLh8>X%U2QMN435$I?s*l&RkyBE%(&=$ak51c{xkk
zThap&9&Qkd9P{*g!AN#i=B=SL-5YNjDAD;(zcz|3MSAdAC@B7*{oRS~aScdfx;GXr
z@Kc4&w*fOYnX>m&CJ2%Ft2Ea}rwHtLKot#}mEBCooL)6&_wc_XX|N-!v1RYx5^2BD
zP#(GvUDe}s69aU(L(91OCs(r$wvel8F6%?3UnX}X1pY(T^li3~&to1=F1;W7&j6W9
zKmPHq-B81K#Dkj}R4b;<N%iH@T$L){Nd8T$kQR@#ln2z7<Lh>H4Lr}ACnshZD7b*j
z51sX6W~9>L)X(UHdnLYQ33eHe9LdIGMh4!c=Kn+S&6dk5p9MpQ@j~3!b3p!{O_(oj
zsSI#^Nu}T0dvTRk7M2TQ=G$|xL%bTR!z(TnX^%cW?Dll>IN&`gv3^}OM)z_lKdnzM
zi(Po_raH23W+%6<t>dWqYduRs-OeD+Ef=aP_#b9Yy|2#3?w-2{v~YoY9_>q4q2Fa*
z>YS2v_gGm~Ro$G%4PdRwK}O}>H+uKCt8FPHl!y>a^Kr{YWrb(NJk+KL70`yr*eO4p
zp!0OkGEEPI#U!W#{gs=lx}Iuw%O|KAJ7xdf5`s#7*75lMw&dfpa3HpFT!Hon8Vv1y
z0#5T-Q=QcGk?JF@j#E1JY9;&ms(y!B3fXFLw)9b8!h_^epjDD@eVwW8+5)6p0_?8&
zTUln!(!a(Vyq#r;^jNPFJh;}?hhY=5+SurZm;qU~zik+pcQSuQuSPJgg3ltFnX|<Y
z@B&7KA-Y>87iau!(%b~=6#{pDR=|?(EHn^*(8yA>lHe;6Z3^Rus@f!T&#A`Pt=|iW
zQyxTLL1@`gwwlYcCR9`SmA9d~p1fVday0{4<}f_5XYnV?j2YKKsp3DZB!{w)DESL?
z_C$f=o}h_Nb0F~{5{nlfFgCRad`Ebs-`_ma9*{A_{toF6%3`x$X1!jS0<^F<r9Ryw
z=mkq!(Nrx;KcrAScE&U?XUbY28ZzI^u=|E~p1xSDsmbKN?PU$W=8?r~{RYLU7b`!H
z)MRS@<MvdpO?xpjn_H;mnei=s-3<V0dF%Ou7$7JpCMX%}^Pugjd`HnGXz~S2ZNp>t
z6Ym0+&DWoN-5s&z#M|$Y`OV&|y<hS5zUB=-X%dqVlJ9mM+i1(L*X-eo#gy)~VN6wR
zt;x0vXU!Ji8)x{I*W0aH+|K^vo1mK`8^?|CUVG1!!o+*NjoV)XW(%I8ixO!yz}Yp*
z<}Cw112W=qhh>d1CRkL^dbxO8+J&hLCOb`6_s}Ew$lF0>+Hj-oPex&-=nlQn%*W93
zk-8I|5Y@8lzl21Aqd8mf8_jNUZkB<?Be^5z|1qZ!9tJWXA>@eQsF57-cD-#>fhy3s
z+BOWh!9ap19B6+nxrBU&RJcNQ)%Bmk&!g0IfaF_SuY23nz3v5YXbKS@wdHIY$--GN
z*|XA?^PE;ttr}OBho81=qG<!Ip$vRw;a=3ki79hNTosa?^&hiJxK)q5NS0F`uB~G!
zj#MUThYNxvfR1(nt8I6oP`9r?fzDc`FE3WJ#B;zvSmgP{YN!!1aoX*5jjI;W`C>2~
z^y|acDs28QiAmo;K!cx{injE&`uwA5?^DqG;?uKI%xY#p{%mEFZ3!3f^WFCU8)a#)
zEdfZ^$2gzIP$Zz2UQi+hRC$&oRJ1acLVK$(Ob#iRjJ8b$N+76*FLo)!ZztBH>cTk&
zxl~Pg%YOfk8KT}BgJkczl`LE5wkc8y<F5}#Y{aKDemuDjmrz$Qf3Q_n_<s;(pw!f{
z6n@hRWuE|dKI+z}($6c30bvv;W3A0d&Ymbet+jlAC5StOxTtMV!<jv4dx3Mg#`91u
z{qna5dGRw+z+rQ~BXwcnC3P7GKPts{R!;)68R7@Sy(|97xt<BA_|z54o_~!D#??nk
z9M_pJ10PdMXXZ*M17g?)AbJWF)q0+{A&UWfl~(017S;8lvV2|q8*H+SD<*($?ixk7
zqBzgu=jPijrjPmj(V1BAV^n1oS(&3R%_EENDdf8Gz0RD{0>8gj&jyl>*X5E_%P#$U
zYn9HDj5her|MFYnPsELXdbofzmyE5!4{e!kAatjO%Xe^!>_A^^NzEeAkmroZMxZ`;
zJ~XvhK-~qJd9#SYgBxmv*bN(s+;Y<E4^XBUzPcv9ke?fUYo<|-sHc5yC;q)BwviB5
zXOv3Xe9q4FkJ)j1_lQC9*SV#sgYKX-r6i!ULzw*=_024ab$3ogoYw)S<*0^3HrfU$
zqB}FSd901E^30zIeLqZVA5>elo`EvWjxSWz5+*vPY67--Vp;w@=3!*xfvjTIi|9E`
zAY;NfD>bp^T5Tq`=s)qJJia->;xsYb+avrt4JJ1XCx!Q{D30=7e}jLr4NgLM=Q4Og
zrKBszTk4sU*f_6`04+gtRI^EpN;G*PkG39r(JePu1Xp9F@vq+Ln<i;>7JN|tofkY<
zhb)y(KTZViKr$yuKy!nlr%^~pVLYz+%yB9z;7#oI9x-J^?xoj-kH?@NMmm_@EQ`wa
zO@!slxhkdbt=P@+>%~LwhDL_SE_%F49e;_<CO_u>rh+$bT(Q*uPe5FJ6qHA^=9t4e
zr$jCQz3@Gc^m@98d>3XL5QNE0z+CFeoF<}5fLg%2Jb<!8HACMZ9&+{MzCGB)2EoxZ
zSMrpCh8Kb$t=PNn6FT*#thqD$)Q{l1DC|D%X8r#?$==7#5Z7B1?2vG-eLGU&ehGNt
z?3=p=o6RD$*Fh>20X7sLd@QOMs&G?c74m#L?(A0DeUh&V_UEjQP4W2Q<-PP2BA-;^
zHAl+?N6!%+OP=46yNx8Oblop4N{90h!2#|K<N^MKcWCBFNc|RTjX%`FLzEYo(*Z1&
zCg#$4)toc%$LYThme-?pa$SJz-)eMk9SPkeO(U>FJZ57d@4_Z)!V6W;W|ND&`(^R8
z*Qd6CX04x93A=x#xnLD`h=Ra@g}D`mM(f~*5HqI#OeE)I{$NS&fzy|h(ZIwVV3`GO
z@;q)JC~3{!WaG|KqOd32_$&n!7~Ky?mzGN(ytl0y!FZ;&s)`d`VGD6jTKqM#TV{RJ
zo4!GdxrYE1)<&-P$~rx(kmY%8K94W|^72%;Ke^CX=3j>jW9j4VwR0d%lj`INQSAZi
zNnNhR4c)d|1TqG09&0(OEB!beWW<_1aP0{st2?W#GGPUrHDZ!M6F!(z`}M{TMQ_Kc
zfMQN@;Ld5Nzn^E?((civs<_l~bw_{w^vF*XG2=fG0gfWz+Fo>J%?B>5J1kF~0j$D@
zhSehfC!S=Jd!0Z%LWy1Eon2#GKh46yWv#K?TO0M+w)jwuqGsR&IGSIC15=^1!A*W#
zRF%^)9q9+H{=MJkm)eaN$g4RBYxyr8eTNHqqF8Pi>!F35)WiU|#qHR?oa~LDHaSV%
z_~lnH-P<IK&S?ZjXI`|;3Wa9kraQqU4g+KBW4R*PX?3g<hpO(sG9$~qUoieE41zik
zHg6UE{2a)$#6VeGaPfG{{BSwoU956i&ZmG^jHF~I*Kb@S5&QIpQkiG2cL^m74;7V-
z{u_)C#Osmb>Lz|JBrT;!JxC8=kma1lJ|m!Ao+e^r=G*O^sih?%A1f<t<qCsH!%_1*
z<!U}a(5d&TIm`yitLtO1xV6)FuNilS07u2{HnOkqS+34~uLY_~Kr1KpSwqD+brHYK
ziW|T4g~->t(lPjyQbSxPxYF(9{}68^D$AKAEb8r6sh<!K?S4*hIre_W4#<pE0!qNZ
z^cY0T8(+R-OKmL%dVz;(!efz`{LEQiK-BuE3aZa*sYoS(14E^=y`cu8KekHST;ccS
zfW9%(kn`8MV`S*~O7L*g_%F#4%m0MR+Q*=ozh2fZ|MUdy=BsS!ZOOVRW415^(C&FR
zy`Ppcj;shR*=9JMWUSYco334176Bn7p-tc463!v7YAK-!K1$}c9!`axu|=lu0{~0w
z{DmdCav{&onoCv83f4qW^Aveyut`5k<6F}<u$r6E#9964a#c=~qSr_Kl(*_F6gb3y
zA;ZL_lGED^ptQ^&z|Bz4f{Bu|Dys;FpNW{e`!I|uUoAh)0CO-gA?^Ls3WR)m*B(>l
zOyv!jf=k2}nWS?QQ1|4b|4E0~YPG8NOKrD*D!qSlcI_x8SV8J+ne<>PeL-$s+jcEi
zLbM()3g(2)pok6vTM!w<x#dSn+nwN;`@>;fJI+)uFfRX`UF5o4wkPu0|29RxZ5m)o
z{xHU@`l@9O<#>&2Q4vd8cEf-d%s%$*h}@EM9*nz<&#(<6ptc>(UvyDg!=~<-zo(@V
zc0BYYrB=_*xMX0x82{yYd;}y<U97<vc;hoL9u*9@e76m}{Xj0H&)~J0=yih}2F|_r
zLk9ge$nb8xADdqtLCh57vPC&Ct7(IDao?BDfJdsCp51g_bTdn;`ZpP5tKO=L{$%7=
zlq;FAh;qKReBN)AB`J}XNl@|^j<xfoRDTBt`%0(k|Lo6wgR*nN%>~ZqMQ5#+CG|CU
z^~m3&7$c~kJ1x4#17rSGc-w4~=&KFJK1I1yS&NKo;>r(1Y=--|-g~|ggY&8;NB$O2
zrBzmc?Qy&a4B$mj`TYAkemNw*No|<~yVqTdHR#i*JQ>aRUEL_E4WMY$zZ3-uj2a<Y
zH8mjNI%5MV3o-Gde3`(9IquiF4_CgVrqmTk{f?L;BksiH`yw-e+9ILo@sppnDDNK{
zQqizVO=vCw5&NLn;P*xi{c<Qkg4qA<jLu`(`09urYnASYo6c7+qC6O3>HD|yidayH
zp$1KvsjVxW8c%(tXIe&oa@nhEHxrxTkx=v<VElO=C7SOC4DW<$R8w<J7wQf*Yb~1z
zzUY60K#YzL%NtQ0(wM2PUlPE$^w71c@H{<)FQ^CzLi3it`6G_UB)`4ThBpP~0Je?a
z0twrCLM}6@=<DE|<#Bw|$tLN&I6`hl)fJFA4y^3P`>#asMnJ}8TMueJDDPbD^i`L6
z_i*a%DNd2qddZR3sk>3OZyOZ{GzDRLC**cd)*njdDK2?$Z~)^&I&#}Tnx3<O$UQ9E
zm4HVYh=3Sv{>wa|gjs2`9i~30&OX^d{^-8~D0G2X7u~I!IU+MC6OIiTx!Ym%6x&T_
z>O}<A`<`Lud{fKvnu$eNG|-x8$r+Aw#S5U{+mi3i{GTm{lpoxuTxsP2UP@}aOVQoA
zW3yjQkjc3wj&q&usaSE!?4A`H<`~&><9Y7&LjzuEu=dw9>Y@XC%0uuZ(%sbGpQwlZ
zDf@3UqDoI5%n}MB!QKt`@EGf(6)e_VKYD{fMgAHX`w;XzB(Ozc-N3}!*A7!rhhXLN
zT7gbDRRY7qdKgd6w7nGNJ};k9BoGPOAoINb^T7G*89Mi>mC2Nh1Y*?sX~J{K;j{*X
ztisnhM}z4nkADp$=0*ihS=~6A;f~T7Fm35s8|o7wnlz`*X-mCY^BZi`J{$n=f&Y_O
zYU>ox>HkETe7gfkH=F_Ldg*X(@qwYO_Uj1Uo*Zdd%W{v5`7f#bfsE#JzEcl;E3WUz
z*Gl;eZEi*2b@oNB0UiKuYabbd^Q{AM{>%0qAOsx13j{xI(g$n?&^QM)`(5+<a;6a&
zmvNIU&=uOLG#p$Op^x{cr##%8*b7Hs7xk>klCWZHn=Ks6>~#_9f$%;*tvPHhDGP)F
z1^vq>Io?U^PvvxJi9qK;?zfc)adk@uKx;_(d>KX%vM*59#Fm%DNyXs`utbFB9LltT
zwcRe@H@AuvTi5VQioh~1L*YlV2!7<uzif(QAOrJxw(rW%V_UP1SN{=yERlSZ76%M0
zq;w^7WMHOG6b2mzvYyepOiu6LWCUhMA60O8fiA#>4{S`l9OXLu>dfzUDmy_s#sfwF
z51@Ar<k@Jsi4&b=K$QvHDKNoK;veeU)x;D$v|7+F3pogvMia`H3)m?<OLDV8V%m~7
z6Vyk*On$mcG=^{OVt?n<dhhnj-TCCrwX%>|(DZMP6XWCKElCB&7j-x2G9<BpE&$AX
z=zCZ%K;9nhG%LNeyO-PH21OO=G}^b=5w>7hYPxSS-;(yd^@fGT<_YlY<ooY$-Sca4
z#5F-jR95EHvAL)Z0q@43iWYB1dHyBNSqAcTb5;Cmm^YisZN#I^RQ)%YgBMYqCsJHB
z>N@5-H*M0aHV1i;LRy(H1v9hXjKDlz-`b+dM=2v<z<<6aa!xhHAlWKOp)^A6$F_(;
z=yO=fxZS%KJGup#sQ0GBwFxE5YsaA2yst+svDQdEpG^Dv$&4JLnU$g$mEQ7pc<1Kk
zZ(*t<VX9MMs=yCJVX9qWs@SlDw|k-SjKX|3X7io1z1u~*cR%k@`!Xuw3U*R5c2e?o
zSw%o$(hn$9no&d?91P0OV&vkZh_>8E&Ht!UiL)eUE}SQ1PNp7NKoIV5pNoL(V{}3k
zH0i2=Y>ce;%ZE3-V|$H_{Nig9Y5SU!bEwJg+9idL7(WYwb)NbYTzzvFID^j6ZZN^5
zKojy&0chg2n`8#27wI_5XmW^x!cLWv54#cw-J|bI5#a%}u>|L2asa7-ka;v=uH!QM
z^U|Z%gWOCD!Arm!tSJZ;R$(MCB*g_v`!lPqW)#-MI8+6UE8jWL#|Zx!K{sH9O9j`V
z%OmE{|7{G6GXZ0|`OBAJ7`Z<$#WM~EG3ENM8QPQoLb(4qKJDvFj$S&<#sdcVXl=SF
zfo+sLf;8O*7$6$oBT@wa*)45|-D435-+<xeNVjX~(SL^W2Z3mvt?=*)(41qeu4g-k
z@&JDMvES<%W8tFC;6%y42c?hJ9_dJ3xOkRiA6D}>nw?I-G_KxT$p<Hc)B!#9si
z<Z*rleUT8oqcGv`MHPVTo4{c9uRS8U^zkx~_Wbh&0}0v&vi6}BtNVe_3ZRgJDfolp
zOTPBas5=m)mIDHU@CU;LL-=%*PC|f>`Eyv=Ki>}UelL}%|BCdEnFuI0rHdErexbtt
z0RC#rW&RGtX|f`dgShl(jsNg8s8a4PCEs(Cz|3mt-```P(@MXDS5(5{imZ|ZK^S1l
zk#5d5v>j{o)Bxjsm`|YmKLbVNpYBQj(+_RFBO7EcsijkU337BoP4}d7;7<4@n>W98
z+!F;H&)Jy=a`=S!sb6n>4L;{o9kKIeP&aUU$GK8%pm;+Ka8ui?2Y=R_zHrapcNy?x
zk}#ks!_UjWz2sb9mVXksb@s>d*T9XaK(Szry=V9z$v;=vusnG$*VoE*(V#O<p11=S
zZvi(GzH9yXN`~dzKfxpAD}mc#W4D7sr(ku=H_+md{*u+4r@H5|fAwwS>RP@dPY@{g
zz;Bn6_mbx`3Tl9j-+e~~Uq;7n2ky8B9>)Q!owZ`@tYgDtTlx0`_d73r_326F?ArTF
zwaY?*3qU}NDYZQM0(4JGYyVffH2<S>?bTzOUuVy4|E$};JVapsJ;!!X0C5^I-etL!
zeKrEPd6o^dO)dWaap1ufz8d!KqFwo~_`m#02DW0R|NJe#Jbmq6hwroMH`}o%*6;wA
zX9H_KaE6+%V0ozJ*7I#L6_ZWo<QK^8Qnw2O6)2^kjnEArJ16x&R|vT+_c~<3edw7Z
zJ1PS|{gz?LKCS$7>uWnu&_6Z@jz|mVOy9|WE$sNEQ>|NoVh5Ip9((_p>2D6-p{-ki
z$Gb3I&jkh7vGp<Ed^YSoSh``G#k^opOV$izf-v~3mJ7h+WfG#TF3SNAUpWdg;|S|7
zm+KF2UHjntxAgj3(0Lt358FS6$Fk>mFZ`=@eJ}XjmWx9Bfm>?rck|u6+5YVbJ7^Ck
zSm`FgW1IJCL_W33NSC^`Sj9tsN9o-tpb-kjz_@$~6s=ws3mmuf2l?|%W#hi9r8mxH
z{E3bI_~GHY-=Jtpc@K;fwGZE(DwO!A*t}%_B@NvEs(<}0_%Ird$&-r(_NW_$tpe3Q
z(m+EPszFJf{p9yzfqln~=K<YpEPM-iP)jdRsv-9C<mSB!aTA*b?2ZZE2Np6aS7W}#
zZWjQ`ZP54dE0jNUT;#_8rA?1C*WCt%Q^%*nKcv1V*35n!sqgb|0dNe5%e#8lJ5X_^
zaIWQ%swKy>XsLeSK@U-`OJ}b84L&C&G~$nu{RMk-#h-b`^9m#`U7x-2MvpXb7+@OL
zu6ygE!r#2zWee2run%}ZhO_I^;J*R?PjEl}{mZ%LPej$V+cmYvxF>IYb{@F-xAfU+
z$&<f2S-<|U{A{{c<C_?8fBY(7YX#UrUjdwgUl$GXu8$n>V2I_jPTyX=dRJK(a3~8n
z?h*R3>iyKIF-?y=|JGlV|8V%nYU%Sg{$K7fl|Dab`d$s*pT9JPKm1SqS2Gv5y&l-!
zTdiSt`p@skRi-!nR{|T|K0sp^*qP|x+E}#xlJ=zL&?U=f8XoU44)K2JtJM*|9=P~<
p)vd6%(Uyxr1;!{q4}q<Z|1&2r&HqsN{?J2^oTsaw%Q~loCII2c*RB8n

literal 0
HcmV?d00001

diff --git a/metadata-ingestion/docs/sources/mlflow/mlflow_pre.md b/metadata-ingestion/docs/sources/mlflow/mlflow_pre.md
new file mode 100644
index 0000000000000..fc499a7a3b2b8
--- /dev/null
+++ b/metadata-ingestion/docs/sources/mlflow/mlflow_pre.md
@@ -0,0 +1,9 @@
+### Concept Mapping
+
+This ingestion source maps the following MLflow Concepts to DataHub Concepts:
+
+|                                  Source Concept                                   |                                       DataHub Concept                                       | Notes                                                                                                                                                                                            |
+|:---------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------:|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| [`Registered Model`](https://mlflow.org/docs/latest/model-registry.html#concepts) | [`MlModelGroup`](https://datahubproject.io/docs/generated/metamodel/entities/mlmodelgroup/) | The name of a Model Group is the same as a Registered Model's name (e.g. my_mlflow_model)                                                                                                        |
+|  [`Model Version`](https://mlflow.org/docs/latest/model-registry.html#concepts)   |      [`MlModel`](https://datahubproject.io/docs/generated/metamodel/entities/mlmodel/)      | The name of a Model is `{registered_model_name}{model_name_separator}{model_version}` (e.g. my_mlflow_model_1 for Registered Model named my_mlflow_model and Version 1, my_mlflow_model_2, etc.) |
+|   [`Model Stage`](https://mlflow.org/docs/latest/model-registry.html#concepts)    |          [`Tag`](https://datahubproject.io/docs/generated/metamodel/entities/tag/)          | The mapping between Model Stages and generated Tags is the following:<br/>- Production: mlflow_production<br/>- Staging: mlflow_staging<br/>- Archived: mlflow_archived<br/>- None: mlflow_none  |
diff --git a/metadata-ingestion/docs/sources/mlflow/mlflow_recipe.yml b/metadata-ingestion/docs/sources/mlflow/mlflow_recipe.yml
new file mode 100644
index 0000000000000..e40be54346629
--- /dev/null
+++ b/metadata-ingestion/docs/sources/mlflow/mlflow_recipe.yml
@@ -0,0 +1,8 @@
+source:
+  type: mlflow
+  config:
+    # Coordinates
+    tracking_uri: tracking_uri
+
+sink:
+  # sink configs
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 80e6950dc5ace..65deadf16a5b3 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -344,6 +344,7 @@ def get_long_description():
     "looker": looker_common,
     "lookml": looker_common,
     "metabase": {"requests"} | sqllineage_lib,
+    "mlflow": {"mlflow-skinny>=2.3.0"},
     "mode": {"requests", "tenacity>=8.0.1"} | sqllineage_lib,
     "mongodb": {"pymongo[srv]>=3.11", "packaging"},
     "mssql": sql_common | {"sqlalchemy-pytds>=0.3"},
@@ -477,6 +478,7 @@ def get_long_description():
             "elasticsearch",
             "feast" if sys.version_info >= (3, 8) else None,
             "iceberg" if sys.version_info >= (3, 8) else None,
+            "mlflow" if sys.version_info >= (3, 8) else None,
             "json-schema",
             "ldap",
             "looker",
@@ -577,6 +579,7 @@ def get_long_description():
         "lookml = datahub.ingestion.source.looker.lookml_source:LookMLSource",
         "datahub-lineage-file = datahub.ingestion.source.metadata.lineage:LineageFileSource",
         "datahub-business-glossary = datahub.ingestion.source.metadata.business_glossary:BusinessGlossaryFileSource",
+        "mlflow = datahub.ingestion.source.mlflow:MLflowSource",
         "mode = datahub.ingestion.source.mode:ModeSource",
         "mongodb = datahub.ingestion.source.mongodb:MongoDBSource",
         "mssql = datahub.ingestion.source.sql.mssql:SQLServerSource",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/mlflow.py b/metadata-ingestion/src/datahub/ingestion/source/mlflow.py
new file mode 100644
index 0000000000000..0668defe7b0c6
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/mlflow.py
@@ -0,0 +1,321 @@
+import sys
+
+if sys.version_info < (3, 8):
+    raise ImportError("MLflow is only supported on Python 3.8+")
+
+
+from dataclasses import dataclass
+from typing import Any, Callable, Iterable, Optional, TypeVar, Union
+
+from mlflow import MlflowClient
+from mlflow.entities import Run
+from mlflow.entities.model_registry import ModelVersion, RegisteredModel
+from mlflow.store.entities import PagedList
+from pydantic.fields import Field
+
+import datahub.emitter.mce_builder as builder
+from datahub.configuration.source_common import EnvConfigMixin
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.api.decorators import (
+    SupportStatus,
+    capability,
+    config_class,
+    platform_name,
+    support_status,
+)
+from datahub.ingestion.api.source import Source, SourceCapability, SourceReport
+from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.metadata.schema_classes import (
+    GlobalTagsClass,
+    MLHyperParamClass,
+    MLMetricClass,
+    MLModelGroupPropertiesClass,
+    MLModelPropertiesClass,
+    TagAssociationClass,
+    TagPropertiesClass,
+    VersionTagClass,
+    _Aspect,
+)
+
+T = TypeVar("T")
+
+
+class MLflowConfig(EnvConfigMixin):
+    tracking_uri: Optional[str] = Field(
+        default=None,
+        description="Tracking server URI. If not set, an MLflow default tracking_uri is used (local `mlruns/` directory or `MLFLOW_TRACKING_URI` environment variable)",
+    )
+    registry_uri: Optional[str] = Field(
+        default=None,
+        description="Registry server URI. If not set, an MLflow default registry_uri is used (value of tracking_uri or `MLFLOW_REGISTRY_URI` environment variable)",
+    )
+    model_name_separator: str = Field(
+        default="_",
+        description="A string which separates model name from its version (e.g. model_1 or model-1)",
+    )
+
+
+@dataclass
+class MLflowRegisteredModelStageInfo:
+    name: str
+    description: str
+    color_hex: str
+
+
+@platform_name("MLflow")
+@config_class(MLflowConfig)
+@support_status(SupportStatus.TESTING)
+@capability(
+    SourceCapability.DESCRIPTIONS,
+    "Extract descriptions for MLflow Registered Models and Model Versions",
+)
+@capability(SourceCapability.TAGS, "Extract tags for MLflow Registered Model Stages")
+class MLflowSource(Source):
+    platform = "mlflow"
+    registered_model_stages_info = (
+        MLflowRegisteredModelStageInfo(
+            name="Production",
+            description="Production Stage for an ML model in MLflow Model Registry",
+            color_hex="#308613",
+        ),
+        MLflowRegisteredModelStageInfo(
+            name="Staging",
+            description="Staging Stage for an ML model in MLflow Model Registry",
+            color_hex="#FACB66",
+        ),
+        MLflowRegisteredModelStageInfo(
+            name="Archived",
+            description="Archived Stage for an ML model in MLflow Model Registry",
+            color_hex="#5D7283",
+        ),
+        MLflowRegisteredModelStageInfo(
+            name="None",
+            description="None Stage for an ML model in MLflow Model Registry",
+            color_hex="#F2F4F5",
+        ),
+    )
+
+    def __init__(self, ctx: PipelineContext, config: MLflowConfig):
+        super().__init__(ctx)
+        self.config = config
+        self.report = SourceReport()
+        self.client = MlflowClient(
+            tracking_uri=self.config.tracking_uri,
+            registry_uri=self.config.registry_uri,
+        )
+
+    def get_report(self) -> SourceReport:
+        return self.report
+
+    def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
+        yield from self._get_tags_workunits()
+        yield from self._get_ml_model_workunits()
+
+    def _get_tags_workunits(self) -> Iterable[MetadataWorkUnit]:
+        """
+        Create tags for each Stage in MLflow Model Registry.
+        """
+        for stage_info in self.registered_model_stages_info:
+            tag_urn = self._make_stage_tag_urn(stage_info.name)
+            tag_properties = TagPropertiesClass(
+                name=self._make_stage_tag_name(stage_info.name),
+                description=stage_info.description,
+                colorHex=stage_info.color_hex,
+            )
+            wu = self._create_workunit(urn=tag_urn, aspect=tag_properties)
+            yield wu
+
+    def _make_stage_tag_urn(self, stage_name: str) -> str:
+        tag_name = self._make_stage_tag_name(stage_name)
+        tag_urn = builder.make_tag_urn(tag_name)
+        return tag_urn
+
+    def _make_stage_tag_name(self, stage_name: str) -> str:
+        return f"{self.platform}_{stage_name.lower()}"
+
+    def _create_workunit(self, urn: str, aspect: _Aspect) -> MetadataWorkUnit:
+        """
+        Utility to create an MCP workunit.
+        """
+        return MetadataChangeProposalWrapper(
+            entityUrn=urn,
+            aspect=aspect,
+        ).as_workunit()
+
+    def _get_ml_model_workunits(self) -> Iterable[MetadataWorkUnit]:
+        """
+        Traverse each Registered Model in Model Registry and generate a corresponding workunit.
+        """
+        registered_models = self._get_mlflow_registered_models()
+        for registered_model in registered_models:
+            yield self._get_ml_group_workunit(registered_model)
+            model_versions = self._get_mlflow_model_versions(registered_model)
+            for model_version in model_versions:
+                run = self._get_mlflow_run(model_version)
+                yield self._get_ml_model_properties_workunit(
+                    registered_model=registered_model,
+                    model_version=model_version,
+                    run=run,
+                )
+                yield self._get_global_tags_workunit(model_version=model_version)
+
+    def _get_mlflow_registered_models(self) -> Iterable[RegisteredModel]:
+        """
+        Get all Registered Models in MLflow Model Registry.
+        """
+        registered_models: Iterable[
+            RegisteredModel
+        ] = self._traverse_mlflow_search_func(
+            search_func=self.client.search_registered_models,
+        )
+        return registered_models
+
+    @staticmethod
+    def _traverse_mlflow_search_func(
+        search_func: Callable[..., PagedList[T]],
+        **kwargs: Any,
+    ) -> Iterable[T]:
+        """
+        Utility to traverse an MLflow search_* functions which return PagedList.
+        """
+        next_page_token = None
+        while True:
+            paged_list = search_func(page_token=next_page_token, **kwargs)
+            yield from paged_list.to_list()
+            next_page_token = paged_list.token
+            if not next_page_token:
+                return
+
+    def _get_ml_group_workunit(
+        self,
+        registered_model: RegisteredModel,
+    ) -> MetadataWorkUnit:
+        """
+        Generate an MLModelGroup workunit for an MLflow Registered Model.
+        """
+        ml_model_group_urn = self._make_ml_model_group_urn(registered_model)
+        ml_model_group_properties = MLModelGroupPropertiesClass(
+            customProperties=registered_model.tags,
+            description=registered_model.description,
+            createdAt=registered_model.creation_timestamp,
+        )
+        wu = self._create_workunit(
+            urn=ml_model_group_urn,
+            aspect=ml_model_group_properties,
+        )
+        return wu
+
+    def _make_ml_model_group_urn(self, registered_model: RegisteredModel) -> str:
+        urn = builder.make_ml_model_group_urn(
+            platform=self.platform,
+            group_name=registered_model.name,
+            env=self.config.env,
+        )
+        return urn
+
+    def _get_mlflow_model_versions(
+        self,
+        registered_model: RegisteredModel,
+    ) -> Iterable[ModelVersion]:
+        """
+        Get all Model Versions for each Registered Model.
+        """
+        filter_string = f"name = '{registered_model.name}'"
+        model_versions: Iterable[ModelVersion] = self._traverse_mlflow_search_func(
+            search_func=self.client.search_model_versions,
+            filter_string=filter_string,
+        )
+        return model_versions
+
+    def _get_mlflow_run(self, model_version: ModelVersion) -> Union[None, Run]:
+        """
+        Get a Run associated with a Model Version. Some MVs may exist without Run.
+        """
+        if model_version.run_id:
+            run = self.client.get_run(model_version.run_id)
+            return run
+        else:
+            return None
+
+    def _get_ml_model_properties_workunit(
+        self,
+        registered_model: RegisteredModel,
+        model_version: ModelVersion,
+        run: Union[None, Run],
+    ) -> MetadataWorkUnit:
+        """
+        Generate an MLModel workunit for an MLflow Model Version.
+        Every Model Version is a DataHub MLModel entity associated with an MLModelGroup corresponding to a Registered Model.
+        If a model was registered without an associated Run then hyperparams and metrics are not available.
+        """
+        ml_model_group_urn = self._make_ml_model_group_urn(registered_model)
+        ml_model_urn = self._make_ml_model_urn(model_version)
+        if run:
+            hyperparams = [
+                MLHyperParamClass(name=k, value=str(v))
+                for k, v in run.data.params.items()
+            ]
+            training_metrics = [
+                MLMetricClass(name=k, value=str(v)) for k, v in run.data.metrics.items()
+            ]
+        else:
+            hyperparams = None
+            training_metrics = None
+        ml_model_properties = MLModelPropertiesClass(
+            customProperties=model_version.tags,
+            externalUrl=self._make_external_url(model_version),
+            description=model_version.description,
+            date=model_version.creation_timestamp,
+            version=VersionTagClass(versionTag=str(model_version.version)),
+            hyperParams=hyperparams,
+            trainingMetrics=training_metrics,
+            # mlflow tags are dicts, but datahub tags are lists. currently use only keys from mlflow tags
+            tags=list(model_version.tags.keys()),
+            groups=[ml_model_group_urn],
+        )
+        wu = self._create_workunit(urn=ml_model_urn, aspect=ml_model_properties)
+        return wu
+
+    def _make_ml_model_urn(self, model_version: ModelVersion) -> str:
+        urn = builder.make_ml_model_urn(
+            platform=self.platform,
+            model_name=f"{model_version.name}{self.config.model_name_separator}{model_version.version}",
+            env=self.config.env,
+        )
+        return urn
+
+    def _make_external_url(self, model_version: ModelVersion) -> Union[None, str]:
+        """
+        Generate URL for a Model Version to MLflow UI.
+        """
+        base_uri = self.client.tracking_uri
+        if base_uri.startswith("http"):
+            return f"{base_uri.rstrip('/')}/#/models/{model_version.name}/versions/{model_version.version}"
+        else:
+            return None
+
+    def _get_global_tags_workunit(
+        self,
+        model_version: ModelVersion,
+    ) -> MetadataWorkUnit:
+        """
+        Associate a Model Version Stage with a corresponding tag.
+        """
+        global_tags = GlobalTagsClass(
+            tags=[
+                TagAssociationClass(
+                    tag=self._make_stage_tag_urn(model_version.current_stage),
+                ),
+            ]
+        )
+        wu = self._create_workunit(
+            urn=self._make_ml_model_urn(model_version),
+            aspect=global_tags,
+        )
+        return wu
+
+    @classmethod
+    def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
+        config = MLflowConfig.parse_obj(config_dict)
+        return cls(ctx, config)
diff --git a/metadata-ingestion/tests/integration/mlflow/mlflow_mcps_golden.json b/metadata-ingestion/tests/integration/mlflow/mlflow_mcps_golden.json
new file mode 100644
index 0000000000000..c70625c74d998
--- /dev/null
+++ b/metadata-ingestion/tests/integration/mlflow/mlflow_mcps_golden.json
@@ -0,0 +1,238 @@
+[
+{
+    "entityType": "tag",
+    "entityUrn": "urn:li:tag:mlflow_production",
+    "changeType": "UPSERT",
+    "aspectName": "tagProperties",
+    "aspect": {
+        "json": {
+            "name": "mlflow_production",
+            "description": "Production Stage for an ML model in MLflow Model Registry",
+            "colorHex": "#308613"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "mlflow-source-test"
+    }
+},
+{
+    "entityType": "tag",
+    "entityUrn": "urn:li:tag:mlflow_staging",
+    "changeType": "UPSERT",
+    "aspectName": "tagProperties",
+    "aspect": {
+        "json": {
+            "name": "mlflow_staging",
+            "description": "Staging Stage for an ML model in MLflow Model Registry",
+            "colorHex": "#FACB66"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "mlflow-source-test"
+    }
+},
+{
+    "entityType": "tag",
+    "entityUrn": "urn:li:tag:mlflow_archived",
+    "changeType": "UPSERT",
+    "aspectName": "tagProperties",
+    "aspect": {
+        "json": {
+            "name": "mlflow_archived",
+            "description": "Archived Stage for an ML model in MLflow Model Registry",
+            "colorHex": "#5D7283"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "mlflow-source-test"
+    }
+},
+{
+    "entityType": "tag",
+    "entityUrn": "urn:li:tag:mlflow_none",
+    "changeType": "UPSERT",
+    "aspectName": "tagProperties",
+    "aspect": {
+        "json": {
+            "name": "mlflow_none",
+            "description": "None Stage for an ML model in MLflow Model Registry",
+            "colorHex": "#F2F4F5"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "mlflow-source-test"
+    }
+},
+{
+    "entityType": "mlModelGroup",
+    "entityUrn": "urn:li:mlModelGroup:(urn:li:dataPlatform:mlflow,test-model,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "mlModelGroupProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "model_env": "test",
+                "model_id": "1"
+            },
+            "description": "This a test registered model",
+            "createdAt": 1615443388097
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "mlflow-source-test"
+    }
+},
+{
+    "entityType": "mlModel",
+    "entityUrn": "urn:li:mlModel:(urn:li:dataPlatform:mlflow,test-model_1,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "mlModelProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "model_version_id": "1"
+            },
+            "date": 1615443388097,
+            "version": {
+                "versionTag": "1"
+            },
+            "hyperParams": [
+                {
+                    "name": "p",
+                    "value": "1"
+                }
+            ],
+            "trainingMetrics": [
+                {
+                    "name": "m",
+                    "value": "0.85"
+                }
+            ],
+            "tags": [
+                "model_version_id"
+            ],
+            "groups": [
+                "urn:li:mlModelGroup:(urn:li:dataPlatform:mlflow,test-model,PROD)"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "mlflow-source-test"
+    }
+},
+{
+    "entityType": "mlModel",
+    "entityUrn": "urn:li:mlModel:(urn:li:dataPlatform:mlflow,test-model_1,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": [
+                {
+                    "tag": "urn:li:tag:mlflow_archived"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "mlflow-source-test"
+    }
+},
+{
+    "entityType": "mlModel",
+    "entityUrn": "urn:li:mlModel:(urn:li:dataPlatform:mlflow,test-model_1,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "mlflow-source-test"
+    }
+},
+{
+    "entityType": "mlModelGroup",
+    "entityUrn": "urn:li:mlModelGroup:(urn:li:dataPlatform:mlflow,test-model,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "mlflow-source-test"
+    }
+},
+{
+    "entityType": "tag",
+    "entityUrn": "urn:li:tag:mlflow_staging",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "mlflow-source-test"
+    }
+},
+{
+    "entityType": "tag",
+    "entityUrn": "urn:li:tag:mlflow_archived",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "mlflow-source-test"
+    }
+},
+{
+    "entityType": "tag",
+    "entityUrn": "urn:li:tag:mlflow_production",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "mlflow-source-test"
+    }
+},
+{
+    "entityType": "tag",
+    "entityUrn": "urn:li:tag:mlflow_none",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1615443388097,
+        "runId": "mlflow-source-test"
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/mlflow/test_mlflow_source.py b/metadata-ingestion/tests/integration/mlflow/test_mlflow_source.py
new file mode 100644
index 0000000000000..76af666526555
--- /dev/null
+++ b/metadata-ingestion/tests/integration/mlflow/test_mlflow_source.py
@@ -0,0 +1,104 @@
+import sys
+
+if sys.version_info >= (3, 8):
+    from pathlib import Path
+    from typing import Any, Dict, TypeVar
+
+    import pytest
+    from mlflow import MlflowClient
+
+    from datahub.ingestion.run.pipeline import Pipeline
+    from tests.test_helpers import mce_helpers
+
+    T = TypeVar("T")
+
+    @pytest.fixture
+    def tracking_uri(tmp_path: Path) -> str:
+        return str(tmp_path / "mlruns")
+
+    @pytest.fixture
+    def sink_file_path(tmp_path: Path) -> str:
+        return str(tmp_path / "mlflow_source_mcps.json")
+
+    @pytest.fixture
+    def pipeline_config(tracking_uri: str, sink_file_path: str) -> Dict[str, Any]:
+        source_type = "mlflow"
+        return {
+            "run_id": "mlflow-source-test",
+            "source": {
+                "type": source_type,
+                "config": {
+                    "tracking_uri": tracking_uri,
+                },
+            },
+            "sink": {
+                "type": "file",
+                "config": {
+                    "filename": sink_file_path,
+                },
+            },
+        }
+
+    @pytest.fixture
+    def generate_mlflow_data(tracking_uri: str) -> None:
+        client = MlflowClient(tracking_uri=tracking_uri)
+        experiment_name = "test-experiment"
+        run_name = "test-run"
+        model_name = "test-model"
+        test_experiment_id = client.create_experiment(experiment_name)
+        test_run = client.create_run(
+            experiment_id=test_experiment_id,
+            run_name=run_name,
+        )
+        client.log_param(
+            run_id=test_run.info.run_id,
+            key="p",
+            value=1,
+        )
+        client.log_metric(
+            run_id=test_run.info.run_id,
+            key="m",
+            value=0.85,
+        )
+        client.create_registered_model(
+            name=model_name,
+            tags=dict(
+                model_id=1,
+                model_env="test",
+            ),
+            description="This a test registered model",
+        )
+        client.create_model_version(
+            name=model_name,
+            source="dummy_dir/dummy_file",
+            run_id=test_run.info.run_id,
+            tags=dict(model_version_id=1),
+        )
+        client.transition_model_version_stage(
+            name=model_name,
+            version="1",
+            stage="Archived",
+        )
+
+    def test_ingestion(
+        pytestconfig,
+        mock_time,
+        sink_file_path,
+        pipeline_config,
+        generate_mlflow_data,
+    ):
+        print(f"MCPs file path: {sink_file_path}")
+        golden_file_path = (
+            pytestconfig.rootpath / "tests/integration/mlflow/mlflow_mcps_golden.json"
+        )
+
+        pipeline = Pipeline.create(pipeline_config)
+        pipeline.run()
+        pipeline.pretty_print_summary()
+        pipeline.raise_from_status()
+
+        mce_helpers.check_golden_file(
+            pytestconfig=pytestconfig,
+            output_path=sink_file_path,
+            golden_path=golden_file_path,
+        )
diff --git a/metadata-ingestion/tests/unit/test_mlflow_source.py b/metadata-ingestion/tests/unit/test_mlflow_source.py
new file mode 100644
index 0000000000000..97b5afd3d6a4e
--- /dev/null
+++ b/metadata-ingestion/tests/unit/test_mlflow_source.py
@@ -0,0 +1,133 @@
+import sys
+
+if sys.version_info >= (3, 8):
+    import datetime
+    from pathlib import Path
+    from typing import Any, TypeVar, Union
+
+    import pytest
+    from mlflow import MlflowClient
+    from mlflow.entities.model_registry import RegisteredModel
+    from mlflow.entities.model_registry.model_version import ModelVersion
+    from mlflow.store.entities import PagedList
+
+    from datahub.ingestion.api.common import PipelineContext
+    from datahub.ingestion.source.mlflow import MLflowConfig, MLflowSource
+
+    T = TypeVar("T")
+
+    @pytest.fixture
+    def tracking_uri(tmp_path: Path) -> str:
+        return str(tmp_path / "mlruns")
+
+    @pytest.fixture
+    def source(tracking_uri: str) -> MLflowSource:
+        return MLflowSource(
+            ctx=PipelineContext(run_id="mlflow-source-test"),
+            config=MLflowConfig(tracking_uri=tracking_uri),
+        )
+
+    @pytest.fixture
+    def registered_model(source: MLflowSource) -> RegisteredModel:
+        model_name = "abc"
+        return RegisteredModel(name=model_name)
+
+    @pytest.fixture
+    def model_version(
+        source: MLflowSource,
+        registered_model: RegisteredModel,
+    ) -> ModelVersion:
+        version = "1"
+        return ModelVersion(
+            name=registered_model.name,
+            version=version,
+            creation_timestamp=datetime.datetime.now(),
+        )
+
+    def dummy_search_func(page_token: Union[None, str], **kwargs: Any) -> PagedList[T]:
+        dummy_pages = dict(
+            page_1=PagedList(items=["a", "b"], token="page_2"),
+            page_2=PagedList(items=["c", "d"], token="page_3"),
+            page_3=PagedList(items=["e"], token=None),
+        )
+        if page_token is None:
+            page_to_return = dummy_pages["page_1"]
+        else:
+            page_to_return = dummy_pages[page_token]
+        if kwargs.get("case", "") == "upper":
+            page_to_return = PagedList(
+                items=[e.upper() for e in page_to_return.to_list()],
+                token=page_to_return.token,
+            )
+        return page_to_return
+
+    def test_stages(source):
+        mlflow_registered_model_stages = {
+            "Production",
+            "Staging",
+            "Archived",
+            None,
+        }
+        workunits = source._get_tags_workunits()
+        names = [wu.get_metadata()["metadata"].aspect.name for wu in workunits]
+
+        assert len(names) == len(mlflow_registered_model_stages)
+        assert set(names) == {
+            "mlflow_" + str(stage).lower() for stage in mlflow_registered_model_stages
+        }
+
+    def test_config_model_name_separator(source, model_version):
+        name_version_sep = "+"
+        source.config.model_name_separator = name_version_sep
+        expected_model_name = (
+            f"{model_version.name}{name_version_sep}{model_version.version}"
+        )
+        expected_urn = f"urn:li:mlModel:(urn:li:dataPlatform:mlflow,{expected_model_name},{source.config.env})"
+
+        urn = source._make_ml_model_urn(model_version)
+
+        assert urn == expected_urn
+
+    def test_model_without_run(source, registered_model, model_version):
+        run = source._get_mlflow_run(model_version)
+        wu = source._get_ml_model_properties_workunit(
+            registered_model=registered_model,
+            model_version=model_version,
+            run=run,
+        )
+        aspect = wu.get_metadata()["metadata"].aspect
+
+        assert aspect.hyperParams is None
+        assert aspect.trainingMetrics is None
+
+    def test_traverse_mlflow_search_func(source):
+        expected_items = ["a", "b", "c", "d", "e"]
+
+        items = list(source._traverse_mlflow_search_func(dummy_search_func))
+
+        assert items == expected_items
+
+    def test_traverse_mlflow_search_func_with_kwargs(source):
+        expected_items = ["A", "B", "C", "D", "E"]
+
+        items = list(
+            source._traverse_mlflow_search_func(dummy_search_func, case="upper")
+        )
+
+        assert items == expected_items
+
+    def test_make_external_link_local(source, model_version):
+        expected_url = None
+
+        url = source._make_external_url(model_version)
+
+        assert url == expected_url
+
+    def test_make_external_link_remote(source, model_version):
+        tracking_uri_remote = "https://dummy-mlflow-tracking-server.org"
+        source.client = MlflowClient(tracking_uri=tracking_uri_remote)
+        expected_url = f"{tracking_uri_remote}/#/models/{model_version.name}/versions/{model_version.version}"
+
+        url = source._make_external_url(model_version)
+
+        assert url == expected_url
diff --git a/metadata-service/war/src/main/resources/boot/data_platforms.json b/metadata-service/war/src/main/resources/boot/data_platforms.json
index 7a7cec60aa25f..3d956c5774ded 100644
--- a/metadata-service/war/src/main/resources/boot/data_platforms.json
+++ b/metadata-service/war/src/main/resources/boot/data_platforms.json
@@ -346,6 +346,16 @@
       "logoUrl": "/assets/platforms/sagemakerlogo.png"
     }
   },
+  {
+    "urn": "urn:li:dataPlatform:mlflow",
+    "aspect": {
+      "datasetNameDelimiter": ".",
+      "name": "mlflow",
+      "displayName": "MLflow",
+      "type": "OTHERS",
+      "logoUrl": "/assets/platforms/mlflowlogo.png"
+    }
+  },
   {
     "urn": "urn:li:dataPlatform:glue",
     "aspect": {

From ca4dc4e3d228e0612a42e7a3e0895573ab38586b Mon Sep 17 00:00:00 2001
From: Pedro Silva <pedro@acryl.io>
Date: Tue, 26 Sep 2023 19:08:09 +0100
Subject: [PATCH 057/156] feat(docs): Update ownership-types image urls (#8905)

---
 docs/ownership/ownership-types.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/ownership/ownership-types.md b/docs/ownership/ownership-types.md
index 2dbefaa488140..f1b951871a5a2 100644
--- a/docs/ownership/ownership-types.md
+++ b/docs/ownership/ownership-types.md
@@ -47,7 +47,7 @@ To manage a Custom Ownership type, first navigate to the DataHub Admin page:
 <p></p>
 
 <p align="center">
-    <img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/e14d8cdc641dd69131bccc53909d7ffdc9d6aac5/imgs/ownership/manage-view.png" />
+    <img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/ownership/manage-view.png" />
 </p>
     
 Then navigate to the `Ownership Types` tab under the `Management` section.
@@ -61,7 +61,7 @@ Inside the form, you can choose a name for your Ownership Type. You can also add
 Don't worry, this can be changed later.
     
 <p align="center">
-    <img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/e14d8cdc641dd69131bccc53909d7ffdc9d6aac5/imgs/ownership/ownership-type-create.png" />
+    <img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/ownership/ownership-type-create.png" />
 </p>
     
 Once you've chosen a name and a description, click 'Save' to create the new Ownership Type.
@@ -162,13 +162,13 @@ You can assign an owner with a custom ownership type to an entity either using t
 On an Entity's profile page, use the right sidebar to locate the Owners section. 
 
 <p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/e14d8cdc641dd69131bccc53909d7ffdc9d6aac5/imgs/ownership/ownership-type-set-part1.png" />
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/ownership/ownership-type-set-part1.png" />
 </p>
 
 Click 'Add Owners', select the owner you want and then search for the Custom Ownership Type you'd like to add this asset to. When you're done, click 'Add'.
 
 <p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/e14d8cdc641dd69131bccc53909d7ffdc9d6aac5/imgs/ownership/ownership-type-set-part2.png" />
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/ownership/ownership-type-set-part2.png" />
 </p>
 
 To remove ownership from an asset, click the 'x' icon on the Owner label.

From f95d1ae8201327ad8ab143a4a1df6ac7d523bea5 Mon Sep 17 00:00:00 2001
From: Jeff Merrick <jeff@wireform.io>
Date: Tue, 26 Sep 2023 14:26:06 -0500
Subject: [PATCH 058/156] docs(website): style tweaks for readability and more
 open spacing (#8876)

Co-authored-by: socar-dini <0327jane@gmail.com>
Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
---
 docs-website/docusaurus.config.js             |  30 +--
 .../components/Feedback/styles.module.scss    |   2 +-
 .../MarkpromptHelp/markprompthelp.module.scss |   2 -
 .../pages/_components/Hero/hero.module.scss   |  23 ++
 .../src/pages/_components/Hero/index.js       |  13 +-
 .../_components/Section/section.module.scss   |   1 -
 .../DropDownFilter/search.module.scss         |   1 -
 .../GuideList/guidelist.module.scss           |   3 +
 .../docs/_components/GuideList/index.jsx      |   2 +-
 .../QuickLinkCard/quicklinkcard.module.scss   |   4 +
 .../_components/SearchBar/search.module.scss  |   1 -
 docs-website/src/styles/acryl.scss            |   3 +
 docs-website/src/styles/datahub.scss          |   2 +
 docs-website/src/styles/global.scss           | 197 +++++++++++-------
 .../img/acryl-logo-transparent-mark.svg       |   1 +
 15 files changed, 174 insertions(+), 111 deletions(-)
 create mode 100644 docs-website/static/img/acryl-logo-transparent-mark.svg

diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js
index 9bdba5f317542..c1ecf0283cf63 100644
--- a/docs-website/docusaurus.config.js
+++ b/docs-website/docusaurus.config.js
@@ -12,13 +12,11 @@ module.exports = {
   organizationName: "datahub-project", // Usually your GitHub org/user name.
   projectName: "datahub", // Usually your repo name.
   staticDirectories: ["static", "genStatic"],
-  stylesheets: [
-    "https://fonts.googleapis.com/css2?family=Manrope:wght@400;600&display=swap",
-  ],
+  stylesheets: ["https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700&display=swap"],
   noIndex: isSaas,
   customFields: {
     isSaas: isSaas,
-    markpromptProjectKey: process.env.DOCUSAURUS_MARKPROMPT_PROJECT_KEY || 'IeF3CUFCUQWuouZ8MP5Np9nES52QAtaA',
+    markpromptProjectKey: process.env.DOCUSAURUS_MARKPROMPT_PROJECT_KEY || "IeF3CUFCUQWuouZ8MP5Np9nES52QAtaA",
   },
   themeConfig: {
     ...(!isSaas && {
@@ -35,12 +33,8 @@ module.exports = {
       title: null,
       logo: {
         alt: "DataHub Logo",
-        src: `img/${
-          isSaas ? "acryl" : "datahub"
-        }-logo-color-light-horizontal.svg`,
-        srcDark: `img/${
-          isSaas ? "acryl" : "datahub"
-        }-logo-color-dark-horizontal.svg`,
+        src: `img/${isSaas ? "acryl" : "datahub"}-logo-color-light-horizontal.svg`,
+        srcDark: `img/${isSaas ? "acryl" : "datahub"}-logo-color-dark-horizontal.svg`,
       },
       items: [
         {
@@ -50,7 +44,8 @@ module.exports = {
           position: "right",
         },
         {
-          href: "/integrations",
+          to: "/integrations",
+          activeBasePath: "integrations",
           label: "Integrations",
           position: "right",
         },
@@ -70,8 +65,8 @@ module.exports = {
           position: "right",
         },
         {
-          type: 'docsVersionDropdown',
-          position: 'right',
+          type: "docsVersionDropdown",
+          position: "right",
           dropdownActiveClassDisabled: true,
         },
         {
@@ -201,9 +196,7 @@ module.exports = {
         blog: false,
         theme: {
           customCss: [
-            isSaas
-              ? require.resolve("./src/styles/acryl.scss")
-              : require.resolve("./src/styles/datahub.scss"),
+            isSaas ? require.resolve("./src/styles/acryl.scss") : require.resolve("./src/styles/datahub.scss"),
             require.resolve("./src/styles/global.scss"),
             require.resolve("./src/styles/sphinx.scss"),
             require.resolve("./src/styles/config-table.scss"),
@@ -217,10 +210,7 @@ module.exports = {
     ],
   ],
   plugins: [
-    [
-      "@docusaurus/plugin-ideal-image",
-      { quality: 100, sizes: [320, 640, 1280, 1440, 1600] },
-    ],
+    ["@docusaurus/plugin-ideal-image", { quality: 100, sizes: [320, 640, 1280, 1440, 1600] }],
     "docusaurus-plugin-sass",
     [
       "docusaurus-graphql-plugin",
diff --git a/docs-website/src/components/Feedback/styles.module.scss b/docs-website/src/components/Feedback/styles.module.scss
index b0fa3d7d1bd2b..ee22f6b055012 100644
--- a/docs-website/src/components/Feedback/styles.module.scss
+++ b/docs-website/src/components/Feedback/styles.module.scss
@@ -37,11 +37,11 @@
 }
 
 .feedbackText {
+  font-family: var(--ifm-font-family-base);
   width: 100%;
   border: var(--ifm-hr-border-color) 1px solid;
   border-radius: 0.4rem;
   padding: 0.4rem;
-  font-family: "Manrope", sans-serif;
 }
 
 .feedbackButton {
diff --git a/docs-website/src/components/MarkpromptHelp/markprompthelp.module.scss b/docs-website/src/components/MarkpromptHelp/markprompthelp.module.scss
index 270877cd04a9f..0d874cad11790 100644
--- a/docs-website/src/components/MarkpromptHelp/markprompthelp.module.scss
+++ b/docs-website/src/components/MarkpromptHelp/markprompthelp.module.scss
@@ -325,7 +325,6 @@ button {
   padding-left: 1.5714286em;
 }
 .MarkpromptAnswer ol > li::marker {
-  font-weight: 400;
   color: var(--markprompt-foreground);
 }
 .MarkpromptAnswer ul > li::marker {
@@ -454,7 +453,6 @@ button {
   background-color: var(--markprompt-muted);
   border: 1px solid var(--markprompt-border);
   overflow-x: auto;
-  font-weight: 400;
   font-size: 0.8571429em;
   line-height: 1.6666667;
   margin-top: 1.6666667em;
diff --git a/docs-website/src/pages/_components/Hero/hero.module.scss b/docs-website/src/pages/_components/Hero/hero.module.scss
index c2103bb0782bd..6e4a623f469d5 100644
--- a/docs-website/src/pages/_components/Hero/hero.module.scss
+++ b/docs-website/src/pages/_components/Hero/hero.module.scss
@@ -74,3 +74,26 @@
     margin-right: 0.5rem;
   }
 }
+
+.quickstartContent {
+  text-align: center;
+  padding: 2rem 0;
+  height: 100%;
+  margin: 2rem 0;
+  background: #34394d;
+  border-radius: var(--ifm-card-border-radius);
+}
+
+.quickstartTitle {
+  color: #fafafa;
+}
+
+.quickstartSubtitle {
+  font-size: 1.1rem;
+  color: gray;
+}
+
+.quickstartCodeblock {
+  text-align: left;
+  padding: 0 20vh;
+}
diff --git a/docs-website/src/pages/_components/Hero/index.js b/docs-website/src/pages/_components/Hero/index.js
index 22b406dce037e..ffa298b27a822 100644
--- a/docs-website/src/pages/_components/Hero/index.js
+++ b/docs-website/src/pages/_components/Hero/index.js
@@ -34,12 +34,11 @@ const Hero = ({}) => {
               complexity of your data ecosystem.
             </p>
             <p className="hero__subtitle">
-              Built with ❤️ by{" "}
-              <img src="https://datahubproject.io/img/acryl-logo-light-mark.png" style={{ "vertical-align": "text-top;" }} width="25" />{" "}
+              Built with ❤️ by <img src={useBaseUrl("/img/acryl-logo-transparent-mark.svg")} width="25" />{" "}
               <a href="https://acryldata.io" target="blank" rel="noopener noreferrer">
                 Acryl Data
               </a>{" "}
-              and <img src="https://datahubproject.io/img/LI-In-Bug.png" width="25" /> LinkedIn.
+              and <img src={useBaseUrl("img/LI-In-Bug.png")} width="25" /> LinkedIn.
             </p>
             <Link className="button button--primary button--md" to={useBaseUrl("docs/")}>
               Get Started →
@@ -51,10 +50,10 @@ const Hero = ({}) => {
         </div>
         <CardCTAs />
         <Image className="hero__image" img={require(`/img/diagrams/datahub-flow-diagram-${colorMode}.png`)} alt="DataHub Flow Diagram" />
-        <div className="quickstart__content">
-          <h1 className="quickstart__title">Get Started Now</h1>
-          <p className="quickstart__subtitle">Run the following command to get started with DataHub.</p>
-          <div className="quickstart__codeblock">
+        <div className={styles.quickstartContent}>
+          <h1 className={styles.quickstartTitle}>Get Started Now</h1>
+          <p className={styles.quickstartSubtitle}>Run the following command to get started with DataHub.</p>
+          <div className={styles.quickstartCodeblock}>
             <CodeBlock className={"language-shell"}>
               python3 -m pip install --upgrade pip wheel setuptools <br />
               python3 -m pip install --upgrade acryl-datahub <br />
diff --git a/docs-website/src/pages/_components/Section/section.module.scss b/docs-website/src/pages/_components/Section/section.module.scss
index 4b68ce5533d4d..7a39a60b6fa4c 100644
--- a/docs-website/src/pages/_components/Section/section.module.scss
+++ b/docs-website/src/pages/_components/Section/section.module.scss
@@ -9,7 +9,6 @@
 .sectionTitle {
   font-size: 2.5rem;
   margin-bottom: 3rem;
-  font-weight: normal;
   text-align: center;
 }
 
diff --git a/docs-website/src/pages/docs/_components/DropDownFilter/search.module.scss b/docs-website/src/pages/docs/_components/DropDownFilter/search.module.scss
index 17e5f22490664..2ae0f5c849ba9 100644
--- a/docs-website/src/pages/docs/_components/DropDownFilter/search.module.scss
+++ b/docs-website/src/pages/docs/_components/DropDownFilter/search.module.scss
@@ -74,7 +74,6 @@
 }
 
 .searchResultItemHeading {
-  font-weight: 400;
   margin-bottom: 0;
 }
 
diff --git a/docs-website/src/pages/docs/_components/GuideList/guidelist.module.scss b/docs-website/src/pages/docs/_components/GuideList/guidelist.module.scss
index a8f279e74ef87..46b1b01408592 100644
--- a/docs-website/src/pages/docs/_components/GuideList/guidelist.module.scss
+++ b/docs-website/src/pages/docs/_components/GuideList/guidelist.module.scss
@@ -16,6 +16,9 @@
     display: block;
     margin-bottom: 0.25rem;
   }
+  strong {
+    font-weight: 600;
+  }
   span {
     font-size: 0.875rem;
     line-height: 1.25em;
diff --git a/docs-website/src/pages/docs/_components/GuideList/index.jsx b/docs-website/src/pages/docs/_components/GuideList/index.jsx
index 3a47e1691aeea..9d4b50b5f5159 100644
--- a/docs-website/src/pages/docs/_components/GuideList/index.jsx
+++ b/docs-website/src/pages/docs/_components/GuideList/index.jsx
@@ -19,7 +19,7 @@ const GuideList = ({ title, content, seeMoreLink }) =>
   content?.length > 0 ? (
     <div style={{ padding: "2vh 0" }}>
       <div className="container">
-        <h2 style={{ fontWeight: "normal" }}>{title}</h2>
+        <h2>{title}</h2>
         <div className="row row--no-gutters">
           {content.map((props, idx) => (
             <ListItem key={idx} {...props} />
diff --git a/docs-website/src/pages/docs/_components/QuickLinkCard/quicklinkcard.module.scss b/docs-website/src/pages/docs/_components/QuickLinkCard/quicklinkcard.module.scss
index cf239ff8643ba..4fbbc4583d662 100644
--- a/docs-website/src/pages/docs/_components/QuickLinkCard/quicklinkcard.module.scss
+++ b/docs-website/src/pages/docs/_components/QuickLinkCard/quicklinkcard.module.scss
@@ -20,6 +20,10 @@
     display: block;
     margin-bottom: 0.25rem;
   }
+  strong {
+    font-weight: 600;
+  }
+
   span {
     font-size: 0.875rem;
     line-height: 1.25em;
diff --git a/docs-website/src/pages/docs/_components/SearchBar/search.module.scss b/docs-website/src/pages/docs/_components/SearchBar/search.module.scss
index 6faaf19c7e603..d85607b08e4e7 100644
--- a/docs-website/src/pages/docs/_components/SearchBar/search.module.scss
+++ b/docs-website/src/pages/docs/_components/SearchBar/search.module.scss
@@ -91,7 +91,6 @@
 }
 
 .searchResultItemHeading {
-  font-weight: 400;
   margin-bottom: 0;
 }
 
diff --git a/docs-website/src/styles/acryl.scss b/docs-website/src/styles/acryl.scss
index 8eb9b375830bf..8bb25ca28cb38 100644
--- a/docs-website/src/styles/acryl.scss
+++ b/docs-website/src/styles/acryl.scss
@@ -7,4 +7,7 @@
   --ifm-color-primary-light: #13beb0;
   --ifm-color-primary-lighter: #14c7b8;
   --ifm-color-primary-lightest: #16e1d0;
+
+  // Custom
+  --ifm-color-primary-opaque: rgba(17, 173, 160, 0.1);
 }
diff --git a/docs-website/src/styles/datahub.scss b/docs-website/src/styles/datahub.scss
index a41359c24b833..8d8f0bdd6daa9 100644
--- a/docs-website/src/styles/datahub.scss
+++ b/docs-website/src/styles/datahub.scss
@@ -7,4 +7,6 @@
   --ifm-color-primary-light: #349dff;
   --ifm-color-primary-lighter: #42a4ff;
   --ifm-color-primary-lightest: #6cb8ff;
+
+  --ifm-color-primary-opaque: rgba(24, 144, 255, 0.1);
 }
diff --git a/docs-website/src/styles/global.scss b/docs-website/src/styles/global.scss
index 013e9fb9f0d9a..55a54876b41ac 100644
--- a/docs-website/src/styles/global.scss
+++ b/docs-website/src/styles/global.scss
@@ -7,16 +7,23 @@
 
 /* You can override the default Infima variables here. */
 :root {
-  font-family: "Manrope", sans-serif;
+  // Global
   --ifm-background-color: #ffffff;
+  --ifm-global-spacing: 1rem;
 
   /* Typography */
-  --ifm-heading-font-weight: 600;
-  --ifm-font-weight-semibold: 600;
+  --ifm-font-size-base: 95%;
+  --ifm-heading-font-weight: 700;
   --ifm-code-font-size: 0.9em;
   --ifm-heading-color: #000000;
+  --ifm-heading-font-family: "Manrope", sans-serif;
+  --ifm-font-family-base: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica, Arial, sans-serif, "Apple Color Emoji",
+    "Segoe UI Emoji", "Segoe UI Symbol";
+  --ifm-font-family-monospace: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
+
   /* Buttons */
   --ifm-button-border-radius: 1000em;
+  --ifm-button-font-weight: 600;
 
   /* Navbar */
   --ifm-navbar-background-color: var(--ifm-background-color);
@@ -30,6 +37,7 @@
   --ifm-hr-border-width: 1px 0 0 0;
   --ifm-hr-border-color: #e3e3e3;
   --ifm-hr-background-color: #e3e3e3;
+
   /* More Colors */
   --ifm-hero-background-color: var(--ifm-background-color);
   --ifm-background-surface-color: #fafafa;
@@ -37,6 +45,19 @@
   /* Cards */
   --ifm-card-background-color: --ifm-background-color;
   --ifm-card-border-radius: calc(var(--ifm-global-radius) * 1.5);
+
+  /* Menu */
+  --ifm-menu-link-padding-vertical: 0.6rem;
+  --ifm-menu-link-padding-horizontal: 1rem;
+  --ifm-menu-link-sublist-icon: url('data:image/svg+xml;utf8,<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M6.47 9.47L8 7.94333L9.53 9.47L10 9L8 7L6 9L6.47 9.47Z" fill="black" fill-opacity="0.5"/></svg>');
+  --ifm-menu-color-background-hover: var(--ifm-color-primary-opaque);
+  --ifm-menu-color-background-active: var(--ifm-color-primary-opaque);
+
+  /* TOC */
+  --ifm-toc-padding-vertical: 1.25rem;
+
+  /* Breadcrumbs */
+  --ifm-breadcrumb-item-background-active: var(--ifm-color-primary-opaque);
 }
 
 html[data-theme="dark"] {
@@ -49,16 +70,29 @@ html[data-theme="dark"] {
   .button--primary {
     color: #fff;
   }
+
+  .invert-on-dark {
+    filter: invert(1);
+  }
+}
+
+/* Main Docs Content Area */
+
+main {
+  padding-top: 1rem;
 }
 
-h1 {
-  font-weight: 400;
+.markdown,
+main > h1 {
+  margin-top: 1rem;
 }
 
-html[data-theme="dark"] .invert-on-dark {
-  filter: invert(1);
+[class*="docItemCol"] {
+  padding: 0 2rem;
 }
 
+/* Custom Utility */
+
 .row--centered {
   align-items: center;
 }
@@ -67,6 +101,8 @@ html[data-theme="dark"] .invert-on-dark {
   padding: 5vh 0;
 }
 
+/* Announcement Bar */
+
 div[class^="announcementBar"] {
   z-index: calc(var(--ifm-z-index-fixed) - 1);
   div {
@@ -106,6 +142,8 @@ div[class^="announcementBar"] {
   }
 }
 
+/** Navbar */
+
 @media only screen and (max-width: 1050px) {
   .navbar__toggle {
     display: inherit;
@@ -158,16 +196,7 @@ div[class^="announcementBar"] {
   }
 }
 
-.footer {
-  .footer__copyright {
-    text-align: left;
-    font-size: 0.8em;
-    opacity: 0.5;
-  }
-  &.footer--dark {
-    --ifm-footer-background-color: #000000;
-  }
-}
+/* Misc */
 
 .button {
   white-space: initial;
@@ -192,64 +221,93 @@ div[class^="announcementBar"] {
   }
 }
 
+.footer {
+  .footer__copyright {
+    text-align: left;
+    font-size: 0.8em;
+    opacity: 0.5;
+  }
+  &.footer--dark {
+    --ifm-footer-background-color: #000000;
+  }
+}
+
+/* Hero */
+
 .hero {
   padding: 5vh 0;
-}
 
-.hero__subtitle {
-  font-size: 1.25em;
-  margin: 1rem auto 3rem;
-  max-width: 800px;
-}
-.hero__content {
-  text-align: center;
-  padding: 2rem 0;
-  height: 100%;
-}
+  .hero__subtitle {
+    font-size: 1.25em;
+    margin: 1rem auto 3rem;
+    max-width: 800px;
 
-.quickstart__content {
-  text-align: center;
-  padding: 2rem 0;
-  height: 100%;
-  margin: 2rem 0;
-  background: #34394d;
-  border-radius: var(--ifm-card-border-radius);
+    img {
+      vertical-align: middle;
+      margin-top: -0.3em;
+    }
+  }
+  .hero__content {
+    text-align: center;
+    padding: 2rem 0;
+    height: 100%;
+  }
 }
 
-.quickstart__title {
-  color: #fafafa;
-}
+/* Sidebar Menu */
+
+.menu .theme-doc-sidebar-menu {
+  ul li.saasOnly a.menu__link {
+    &:after {
+      content: "";
+      display: block;
+      width: 20px;
+      height: 20px;
+      flex-shrink: 0;
+      margin-right: auto;
+      margin-left: 10px;
+      opacity: 0.5;
+      background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='64 64 896 896' focusable='false' data-icon='cloud' width='1em' height='1em' fill='currentColor' aria-hidden='true'%3E%3Cpath d='M811.4 418.7C765.6 297.9 648.9 212 512.2 212S258.8 297.8 213 418.6C127.3 441.1 64 519.1 64 612c0 110.5 89.5 200 199.9 200h496.2C870.5 812 960 722.5 960 612c0-92.7-63.1-170.7-148.6-193.3zm36.3 281a123.07 123.07 0 01-87.6 36.3H263.9c-33.1 0-64.2-12.9-87.6-36.3A123.3 123.3 0 01140 612c0-28 9.1-54.3 26.2-76.3a125.7 125.7 0 0166.1-43.7l37.9-9.9 13.9-36.6c8.6-22.8 20.6-44.1 35.7-63.4a245.6 245.6 0 0152.4-49.9c41.1-28.9 89.5-44.2 140-44.2s98.9 15.3 140 44.2c19.9 14 37.5 30.8 52.4 49.9 15.1 19.3 27.1 40.7 35.7 63.4l13.8 36.5 37.8 10c54.3 14.5 92.1 63.8 92.1 120 0 33.1-12.9 64.3-36.3 87.7z'%3E%3C/path%3E%3C/svg%3E");
+      background-repeat: no-repeat;
+      background-position: 50% 50%;
+      background-size: 20px 20px;
+      [data-theme="dark"] & {
+        filter: invert(1);
+      }
+    }
+  }
 
-.quickstart__subtitle {
-  font-size: 1.1rem;
-  color: gray;
-}
+  .theme-doc-sidebar-item-category-level-1 .menu__link {
+    font-weight: 400;
+  }
+
+  .theme-doc-sidebar-item-category-level-1 .menu__link--active {
+    font-weight: 600;
+  }
 
-.quickstart__codeblock {
-  text-align: left;
-  padding: 0 20vh;
+  .theme-doc-sidebar-item-category-level-1 > div > a:first-child {
+    color: var(--ifm-navbar-link-color);
+    font-weight: 600;
+    padding: calc(var(--ifm-menu-link-padding-vertical) + 0.2rem) var(--ifm-menu-link-padding-horizontal);
+  }
+  .theme-doc-sidebar-item-category-level-1 > div > a.menu__link--active {
+    color: var(--ifm-navbar-link-color);
+    font-weight: 600;
+  }
 }
 
-.theme-doc-sidebar-menu ul li.saasOnly a.menu__link {
-  &:after {
-    content: "";
-    display: block;
-    width: 20px;
-    height: 20px;
-    flex-shrink: 0;
-    margin-right: auto;
-    margin-left: 10px;
-    opacity: 0.5;
-    background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='64 64 896 896' focusable='false' data-icon='cloud' width='1em' height='1em' fill='currentColor' aria-hidden='true'%3E%3Cpath d='M811.4 418.7C765.6 297.9 648.9 212 512.2 212S258.8 297.8 213 418.6C127.3 441.1 64 519.1 64 612c0 110.5 89.5 200 199.9 200h496.2C870.5 812 960 722.5 960 612c0-92.7-63.1-170.7-148.6-193.3zm36.3 281a123.07 123.07 0 01-87.6 36.3H263.9c-33.1 0-64.2-12.9-87.6-36.3A123.3 123.3 0 01140 612c0-28 9.1-54.3 26.2-76.3a125.7 125.7 0 0166.1-43.7l37.9-9.9 13.9-36.6c8.6-22.8 20.6-44.1 35.7-63.4a245.6 245.6 0 0152.4-49.9c41.1-28.9 89.5-44.2 140-44.2s98.9 15.3 140 44.2c19.9 14 37.5 30.8 52.4 49.9 15.1 19.3 27.1 40.7 35.7 63.4l13.8 36.5 37.8 10c54.3 14.5 92.1 63.8 92.1 120 0 33.1-12.9 64.3-36.3 87.7z'%3E%3C/path%3E%3C/svg%3E");
-    background-repeat: no-repeat;
-    background-position: 50% 50%;
-    background-size: 20px 20px;
-    [data-theme="dark"] & {
-      filter: invert(1);
-    }
+/* TOC */
+.table-of-contents {
+  padding-left: 1.5rem;
+  font-size: 0.9rem;
+  line-height: 1rem;
+  .table-of-contents__link--active {
+    font-weight: 600;
   }
 }
 
+/* Search */
+
 [data-theme="light"] .DocSearch {
   /* --docsearch-primary-color: var(--ifm-color-primary); */
   /* --docsearch-text-color: var(--ifm-font-color-base); */
@@ -285,18 +343,3 @@ div[class^="announcementBar"] {
   --docsearch-footer-background: var(--ifm-background-surface-color);
   --docsearch-key-gradient: linear-gradient(-26.5deg, var(--ifm-color-emphasis-200) 0%, var(--ifm-color-emphasis-100) 100%);
 }
-
-.theme-doc-sidebar-item-category-level-1 > div > a:first-child {
-  color: var(--ifm-navbar-link-color);
-  font-size: 17px;
-}
-
-.theme-doc-sidebar-item-category-level-1 > div > a.menu__link--active {
-  color: var(--ifm-menu-color-active);
-  font-size: 17px;
-}
-
-/* Increase padding for levels greater than 1 */
-[class^="theme-doc-sidebar-item"][class*="-level-"]:not(.theme-doc-sidebar-item-category-level-1) {
-  padding-left: 8px;
-}
diff --git a/docs-website/static/img/acryl-logo-transparent-mark.svg b/docs-website/static/img/acryl-logo-transparent-mark.svg
new file mode 100644
index 0000000000000..87c9904baaf8c
--- /dev/null
+++ b/docs-website/static/img/acryl-logo-transparent-mark.svg
@@ -0,0 +1 @@
+<?xml version="1.0" encoding="UTF-8"?><svg id="artwork" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 116.7083 125.18652"><g fill="none" opacity=".66"><path d="m105.89021,72.32176l-32.88379-46.08544c-3.37097-4.72436-8.84851-7.54492-14.65234-7.54492s-11.28137,2.8208-14.65247,7.54517L10.81819,72.32176c-3.93286,5.51196-4.45044,12.67578-1.35046,18.69555,3.09973,6.01977,9.23169,9.75952,16.00281,9.75952h65.76721c6.77112,0,12.9032-3.7395,16.00293-9.75952,3.09985-6.02002,2.5824-13.18359-1.35046-18.69555Zm-14.65247,20.45508H25.47053c-8.13147,0-12.86316-9.18921-8.14014-15.80835L50.21382,30.88329c1.99402-2.79468,5.06714-4.19189,8.14026-4.19189s6.14624,1.39722,8.14026,4.19165l32.88367,46.08544c4.72302,6.61914-.00879,15.80835-8.14026,15.80835Z" fill="#18999a"/></g><g fill="none" opacity=".33"><path d="m41.83396,76.00463h33.04028c7.04163,0,13.60364-2.7652,18.45496-7.51343l-4.72095-6.61627c-3.27185,3.69019-8.05029,6.1297-13.73401,6.1297h-33.04028c-5.68359,0-10.46204-2.43939-13.73376-6.12958l-4.72095,6.61621c4.85132,4.74817,11.41309,7.51337,18.45471,7.51337Z" fill="#105f77"/></g><g fill="none" opacity=".33"><path d="m18.37241,61.73479l5.55859-7.79016c-1.21753-4.89282-.47314-10.33594,2.90381-15.06866l16.52002-23.15234c3.67407-5.14893,9.33667-7.72363,14.99927-7.72363s11.3252,2.57471,14.99927,7.72363l16.52014,23.15259c3.37683,4.7326,4.12146,10.1756,2.90393,15.0683l5.55859,7.79022c.01025-.01984.02222-.03833.03247-.05823,4.5509-8.83789,3.79114-19.35498-1.98279-27.44702l-16.52014-23.15234c-4.9491-6.93603-12.99084-11.07715-21.51147-11.07715s-16.56226,4.14111-21.51135,11.0769l-16.52014,23.15234c-5.77393,8.09228-6.53369,18.60937-1.98279,27.44726.01025.0199.02234.03845.03259.05829Z" fill="#105f77"/></g><path d="m114.82783,109.25755l-48.27136-67.65087c-1.88721-2.64478-4.95349-4.22363-8.20239-4.22363s-6.31519,1.57886-8.20227,4.22339L1.88032,109.25755c-2.20166,3.08569-2.49121,7.09595-.75598,10.46582,1.73535,3.36963,5.16797,5.46313,8.95837,5.46313h96.54297c3.79053,0,7.22314-2.09326,8.95837-5.46338,1.73523-3.36987,1.44543-7.38013-.75623-10.46557Zm-8.20215,7.92895H10.08272c-1.68835,0-2.67078-1.90796-1.69019-3.28223L56.66389,46.25316c.41406-.58008,1.05212-.87012,1.69019-.87012s1.27625.29004,1.69019.87012l48.27148,67.65111c.98071,1.37451-.00171,3.28223-1.69006,3.28223Z" fill="#20d3bd"/></svg>
\ No newline at end of file

From 8396829dbc52e32bfc7a542b2a6ebf1cb6daaa6b Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Wed, 27 Sep 2023 14:03:34 -0400
Subject: [PATCH 059/156] build(ingest/databricks): Relax databricks-sdk pin
 (#8855)

---
 metadata-ingestion/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 65deadf16a5b3..024950e3a6fd5 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -258,7 +258,7 @@ def get_long_description():
 
 databricks = {
     # 0.1.11 appears to have authentication issues with azure databricks
-    "databricks-sdk>=0.1.1, <0.1.11",
+    "databricks-sdk>=0.1.1, != 0.1.11",
     "pyspark",
     "requests",
 }

From 2e2cd87d99e66cadcbb9bf833fb83077350ba30d Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Wed, 27 Sep 2023 17:00:02 -0400
Subject: [PATCH 060/156] test(ingest/delta-lake): Fix minio test for new
 version of delta-lake (#8914)

---
 .../src/datahub/ingestion/source/delta_lake/source.py          | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py
index 180ef00459214..c4d01be52ae7d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py
@@ -296,7 +296,8 @@ def get_storage_options(self) -> Dict[str, str]:
                 "AWS_SECRET_ACCESS_KEY": creds.get("aws_secret_access_key") or "",
                 "AWS_SESSION_TOKEN": creds.get("aws_session_token") or "",
                 # Allow http connections, this is required for minio
-                "AWS_STORAGE_ALLOW_HTTP": "true",
+                "AWS_STORAGE_ALLOW_HTTP": "true",  # for delta-lake < 0.11.0
+                "AWS_ALLOW_HTTP": "true",  # for delta-lake >= 0.11.0
             }
             if aws_config.aws_region:
                 opts["AWS_REGION"] = aws_config.aws_region

From 587a46ea1ec04f4144fa24c05c0346a218f7bacf Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Fri, 29 Sep 2023 01:06:23 +0900
Subject: [PATCH 061/156] docs(): fix title of the ui ingestion guide & remove
 browse.md (#8916)

---
 docs-website/sidebars.js             |  1 -
 docs-website/src/pages/docs/index.js |  2 +-
 docs/browse.md                       | 56 ----------------------------
 docs/ui-ingestion.md                 |  2 +-
 4 files changed, 2 insertions(+), 59 deletions(-)
 delete mode 100644 docs/browse.md

diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index b07cd0b03ce11..d8b85da79b31b 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -428,7 +428,6 @@ module.exports = {
         "docs/glossary/business-glossary",
         "docs/tags",
         "docs/ownership/ownership-types",
-        "docs/browse",
         "docs/authorization/access-policies-guide",
         "docs/features/dataset-usage-and-query-history",
         "docs/posts",
diff --git a/docs-website/src/pages/docs/index.js b/docs-website/src/pages/docs/index.js
index 0e8bfdcf3b9d7..0edd07267b27e 100644
--- a/docs-website/src/pages/docs/index.js
+++ b/docs-website/src/pages/docs/index.js
@@ -114,7 +114,7 @@ const featureGuideContent = [
   },
   { title: "Tags", icon: <TagsTwoTone />, to: "docs/tags" },
   {
-    title: "UI-Based Ingestion",
+    title: "Ingestion",
     icon: <ApiTwoTone />,
     to: "docs/ui-ingestion",
   },
diff --git a/docs/browse.md b/docs/browse.md
deleted file mode 100644
index 55a3b16a0a552..0000000000000
--- a/docs/browse.md
+++ /dev/null
@@ -1,56 +0,0 @@
-import FeatureAvailability from '@site/src/components/FeatureAvailability';
-
-# About DataHub Browse
-
-<FeatureAvailability/>
-
-Browse is one of the primary entrypoints for discovering different Datasets, Dashboards, Charts and other DataHub Entities.
-
-Browsing is useful for finding data entities based on a hierarchical structure set in the source system. Generally speaking, that hierarchy will contain the following levels:
-
-* Entity Type (Dataset, Dashboard, Chart, etc.)
-* Environment (prod vs. dev)
-* Platform Type (Snowflake, dbt, Looker, etc.)
-* Container (Warehouse, Schema, Folder, etc.)
-* Entity Name
-
-For example, a user can easily browse for Datasets within the PROD Snowflake environment, the long_tail_companions warehouse, and the analytics schema:
-
-<p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/browseVid.gif"/>
-</p>
-
-## Using Browse
-
-Browse is accessible by clicking on an Entity Type on the front page of the DataHub UI.
-<p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/browse1.png"/>
-</p>
-
-This will take you into the folder explorer view for browse in which you can drill down to your desired sub categories to find the data you are looking for.
-<p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/browse2.png"/>
-</p>
-
-## Additional Resources
-
-### GraphQL
-
-* [browse](../graphql/queries.md#browse)
-* [browsePaths](../graphql/queries.md#browsePaths)
-
-## FAQ and Troubleshooting
-
-**How are BrowsePaths created?**
-
-BrowsePaths are automatically created for ingested entities based on separator characters that appear within an Urn.
-
-**How can I customize browse paths?**
-
-BrowsePaths are an Aspect similar to other components of an Entity. They can be customized by ingesting custom paths for specified Urns.
-
-*Need more help? Join the conversation in [Slack](http://slack.datahubproject.io)!*
-
-### Related Features
-
-* [Search](./how/search.md)
diff --git a/docs/ui-ingestion.md b/docs/ui-ingestion.md
index 2ecb1e634c79f..db2007e1e19a9 100644
--- a/docs/ui-ingestion.md
+++ b/docs/ui-ingestion.md
@@ -1,4 +1,4 @@
-# UI Ingestion Guide 
+# Ingestion
 
 ## Introduction 
 

From e738e16157e2377b0cbcb71c2f2915253f40462f Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Thu, 28 Sep 2023 13:52:35 -0400
Subject: [PATCH 062/156] refactor(ingest/bigquery): Clarify table / view
 queries (#8913)

---
 .../ingestion/source/bigquery_v2/queries.py   | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py
index 5be7a0a7f6b2f..a87cb8c1cbfa5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py
@@ -43,14 +43,14 @@ class BigqueryQuery:
   t.creation_time as created,
   ts.last_modified_time as last_altered,
   tos.OPTION_VALUE as comment,
-  is_insertable_into,
-  ddl,
-  row_count,
-  size_bytes as bytes,
-  num_partitions,
-  max_partition_id,
-  active_billable_bytes,
-  long_term_billable_bytes,
+  t.is_insertable_into,
+  t.ddl,
+  ts.row_count,
+  ts.size_bytes as bytes,
+  p.num_partitions,
+  p.max_partition_id,
+  p.active_billable_bytes,
+  p.long_term_billable_bytes,
   REGEXP_EXTRACT(t.table_name, r".*_(\\d+)$") as table_suffix,
   REGEXP_REPLACE(t.table_name, r"_(\\d+)$", "") as table_base
 
@@ -90,8 +90,8 @@ class BigqueryQuery:
   t.table_type as table_type,
   t.creation_time as created,
   tos.OPTION_VALUE as comment,
-  is_insertable_into,
-  ddl,
+  t.is_insertable_into,
+  t.ddl,
   REGEXP_EXTRACT(t.table_name, r".*_(\\d+)$") as table_suffix,
   REGEXP_REPLACE(t.table_name, r"_(\\d+)$", "") as table_base
 
@@ -118,10 +118,10 @@ class BigqueryQuery:
   t.creation_time as created,
   ts.last_modified_time as last_altered,
   tos.OPTION_VALUE as comment,
-  is_insertable_into,
-  ddl as view_definition,
-  row_count,
-  size_bytes
+  t.is_insertable_into,
+  t.ddl as view_definition,
+  ts.row_count,
+  ts.size_bytes
 FROM
   `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t
   join `{{project_id}}`.`{{dataset_name}}`.__TABLES__ as ts on ts.table_id = t.TABLE_NAME
@@ -143,8 +143,8 @@ class BigqueryQuery:
   t.table_type as table_type,
   t.creation_time as created,
   tos.OPTION_VALUE as comment,
-  is_insertable_into,
-  ddl as view_definition
+  t.is_insertable_into,
+  t.ddl as view_definition
 FROM
   `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t
   left join `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLE_OPTIONS as tos on t.table_schema = tos.table_schema

From 833daa8efda34ff53feec4b641f575c338990afd Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Thu, 28 Sep 2023 13:52:50 -0400
Subject: [PATCH 063/156] refactor(ingest/graph): Factor out filter logic
 (#8888)

---
 .../src/datahub/cli/delete_cli.py             |   7 +-
 .../src/datahub/ingestion/graph/client.py     | 171 +-----------------
 .../src/datahub/ingestion/graph/filters.py    | 162 +++++++++++++++++
 3 files changed, 172 insertions(+), 168 deletions(-)
 create mode 100644 metadata-ingestion/src/datahub/ingestion/graph/filters.py

diff --git a/metadata-ingestion/src/datahub/cli/delete_cli.py b/metadata-ingestion/src/datahub/cli/delete_cli.py
index 7ab7605ef6363..f9e0eb45692d4 100644
--- a/metadata-ingestion/src/datahub/cli/delete_cli.py
+++ b/metadata-ingestion/src/datahub/cli/delete_cli.py
@@ -13,11 +13,8 @@
 from datahub.cli import cli_utils
 from datahub.configuration.datetimes import ClickDatetime
 from datahub.emitter.aspect import ASPECT_MAP, TIMESERIES_ASPECT_MAP
-from datahub.ingestion.graph.client import (
-    DataHubGraph,
-    RemovedStatusFilter,
-    get_default_graph,
-)
+from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
+from datahub.ingestion.graph.filters import RemovedStatusFilter
 from datahub.telemetry import telemetry
 from datahub.upgrade import upgrade
 from datahub.utilities.perf_timer import PerfTimer
diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py
index 38e965f7f6587..e22d48d0af80a 100644
--- a/metadata-ingestion/src/datahub/ingestion/graph/client.py
+++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py
@@ -16,15 +16,15 @@
 from datahub.cli.cli_utils import get_url_and_token
 from datahub.configuration.common import ConfigModel, GraphError, OperationalError
 from datahub.emitter.aspect import TIMESERIES_ASPECT_MAP
-from datahub.emitter.mce_builder import (
-    DEFAULT_ENV,
-    Aspect,
-    make_data_platform_urn,
-    make_dataplatform_instance_urn,
-)
+from datahub.emitter.mce_builder import DEFAULT_ENV, Aspect
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.rest_emitter import DatahubRestEmitter
 from datahub.emitter.serialization_helper import post_json_transform
+from datahub.ingestion.graph.filters import (
+    RemovedStatusFilter,
+    SearchFilterRule,
+    generate_filter,
+)
 from datahub.ingestion.source.state.checkpoint import Checkpoint
 from datahub.metadata.schema_classes import (
     ASPECT_NAME_MAP,
@@ -59,8 +59,6 @@
 
 logger = logging.getLogger(__name__)
 
-SearchFilterRule = Dict[str, Any]
-
 
 class DatahubClientConfig(ConfigModel):
     """Configuration class for holding connectivity to datahub gms"""
@@ -81,19 +79,6 @@ class DatahubClientConfig(ConfigModel):
 DataHubGraphConfig = DatahubClientConfig
 
 
-class RemovedStatusFilter(enum.Enum):
-    """Filter for the status of entities during search."""
-
-    NOT_SOFT_DELETED = "NOT_SOFT_DELETED"
-    """Search only entities that have not been marked as deleted."""
-
-    ALL = "ALL"
-    """Search all entities, including deleted entities."""
-
-    ONLY_SOFT_DELETED = "ONLY_SOFT_DELETED"
-    """Search only soft-deleted entities."""
-
-
 @dataclass
 class RelatedEntity:
     urn: str
@@ -567,7 +552,7 @@ def _bulk_fetch_schema_info_by_filter(
         # Add the query default of * if no query is specified.
         query = query or "*"
 
-        orFilters = self.generate_filter(
+        orFilters = generate_filter(
             platform, platform_instance, env, container, status, extraFilters
         )
 
@@ -621,54 +606,6 @@ def _bulk_fetch_schema_info_by_filter(
             if entity.get("schemaMetadata"):
                 yield entity["urn"], entity["schemaMetadata"]
 
-    def generate_filter(
-        self,
-        platform: Optional[str],
-        platform_instance: Optional[str],
-        env: Optional[str],
-        container: Optional[str],
-        status: RemovedStatusFilter,
-        extraFilters: Optional[List[SearchFilterRule]],
-    ) -> List[Dict[str, List[SearchFilterRule]]]:
-        andFilters: List[SearchFilterRule] = []
-
-        # Platform filter.
-        if platform:
-            andFilters.append(self._get_platform_filter(platform))
-
-        # Platform instance filter.
-        if platform_instance:
-            andFilters.append(
-                self._get_platform_instance_filter(platform, platform_instance)
-            )
-
-        # Browse path v2 filter.
-        if container:
-            andFilters.append(self._get_container_filter(container))
-
-        # Status filter.
-        status_filter = self._get_status_filer(status)
-        if status_filter:
-            andFilters.append(status_filter)
-
-        # Extra filters.
-        if extraFilters:
-            andFilters += extraFilters
-
-        orFilters: List[Dict[str, List[SearchFilterRule]]] = [{"and": andFilters}]
-
-        # Env filter
-        if env:
-            envOrConditions = self._get_env_or_conditions(env)
-            # This matches ALL of the andFilters and at least one of the envOrConditions.
-            orFilters = [
-                {"and": andFilters["and"] + [extraCondition]}
-                for extraCondition in envOrConditions
-                for andFilters in orFilters
-            ]
-
-        return orFilters
-
     def get_urns_by_filter(
         self,
         *,
@@ -709,7 +646,7 @@ def get_urns_by_filter(
         query = query or "*"
 
         # Env filter.
-        orFilters = self.generate_filter(
+        orFilters = generate_filter(
             platform, platform_instance, env, container, status, extraFilters
         )
 
@@ -778,98 +715,6 @@ def _scroll_across_entities(
                     f"Scrolling to next scrollAcrossEntities page: {scroll_id}"
                 )
 
-    def _get_env_or_conditions(self, env: str) -> List[SearchFilterRule]:
-        # The env filter is a bit more tricky since it's not always stored
-        # in the same place in ElasticSearch.
-        return [
-            # For most entity types, we look at the origin field.
-            {
-                "field": "origin",
-                "value": env,
-                "condition": "EQUAL",
-            },
-            # For containers, we look at the customProperties field.
-            # For any containers created after https://github.com/datahub-project/datahub/pull/8027,
-            # we look for the "env" property. Otherwise, we use the "instance" property.
-            {
-                "field": "customProperties",
-                "value": f"env={env}",
-            },
-            {
-                "field": "customProperties",
-                "value": f"instance={env}",
-            },
-            # Note that not all entity types have an env (e.g. dashboards / charts).
-            # If the env filter is specified, these will be excluded.
-        ]
-
-    def _get_status_filer(
-        self, status: RemovedStatusFilter
-    ) -> Optional[SearchFilterRule]:
-        if status == RemovedStatusFilter.NOT_SOFT_DELETED:
-            # Subtle: in some cases (e.g. when the dataset doesn't have a status aspect), the
-            # removed field is simply not present in the ElasticSearch document. Ideally this
-            # would be a "removed" : "false" filter, but that doesn't work. Instead, we need to
-            # use a negated filter.
-            return {
-                "field": "removed",
-                "values": ["true"],
-                "condition": "EQUAL",
-                "negated": True,
-            }
-
-        elif status == RemovedStatusFilter.ONLY_SOFT_DELETED:
-            return {
-                "field": "removed",
-                "values": ["true"],
-                "condition": "EQUAL",
-            }
-
-        elif status == RemovedStatusFilter.ALL:
-            # We don't need to add a filter for this case.
-            return None
-        else:
-            raise ValueError(f"Invalid status filter: {status}")
-
-    def _get_container_filter(self, container: str) -> SearchFilterRule:
-        # Warn if container is not a fully qualified urn.
-        # TODO: Change this once we have a first-class container urn type.
-        if guess_entity_type(container) != "container":
-            raise ValueError(f"Invalid container urn: {container}")
-
-        return {
-            "field": "browsePathV2",
-            "values": [container],
-            "condition": "CONTAIN",
-        }
-
-    def _get_platform_instance_filter(
-        self, platform: Optional[str], platform_instance: str
-    ) -> SearchFilterRule:
-        if platform:
-            # Massage the platform instance into a fully qualified urn, if necessary.
-            platform_instance = make_dataplatform_instance_urn(
-                platform, platform_instance
-            )
-
-        # Warn if platform_instance is not a fully qualified urn.
-        # TODO: Change this once we have a first-class data platform instance urn type.
-        if guess_entity_type(platform_instance) != "dataPlatformInstance":
-            raise ValueError(f"Invalid data platform instance urn: {platform_instance}")
-
-        return {
-            "field": "platformInstance",
-            "values": [platform_instance],
-            "condition": "EQUAL",
-        }
-
-    def _get_platform_filter(self, platform: str) -> SearchFilterRule:
-        return {
-            "field": "platform.keyword",
-            "values": [make_data_platform_urn(platform)],
-            "condition": "EQUAL",
-        }
-
     def _get_types(self, entity_types: Optional[List[str]]) -> Optional[List[str]]:
         types: Optional[List[str]] = None
         if entity_types is not None:
diff --git a/metadata-ingestion/src/datahub/ingestion/graph/filters.py b/metadata-ingestion/src/datahub/ingestion/graph/filters.py
new file mode 100644
index 0000000000000..1a63aea835729
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/graph/filters.py
@@ -0,0 +1,162 @@
+import enum
+from typing import Any, Dict, List, Optional
+
+from datahub.emitter.mce_builder import (
+    make_data_platform_urn,
+    make_dataplatform_instance_urn,
+)
+from datahub.utilities.urns.urn import guess_entity_type
+
+SearchFilterRule = Dict[str, Any]
+
+
+class RemovedStatusFilter(enum.Enum):
+    """Filter for the status of entities during search."""
+
+    NOT_SOFT_DELETED = "NOT_SOFT_DELETED"
+    """Search only entities that have not been marked as deleted."""
+
+    ALL = "ALL"
+    """Search all entities, including deleted entities."""
+
+    ONLY_SOFT_DELETED = "ONLY_SOFT_DELETED"
+    """Search only soft-deleted entities."""
+
+
+def generate_filter(
+    platform: Optional[str],
+    platform_instance: Optional[str],
+    env: Optional[str],
+    container: Optional[str],
+    status: RemovedStatusFilter,
+    extra_filters: Optional[List[SearchFilterRule]],
+) -> List[Dict[str, List[SearchFilterRule]]]:
+    and_filters: List[SearchFilterRule] = []
+
+    # Platform filter.
+    if platform:
+        and_filters.append(_get_platform_filter(platform))
+
+    # Platform instance filter.
+    if platform_instance:
+        and_filters.append(_get_platform_instance_filter(platform, platform_instance))
+
+    # Browse path v2 filter.
+    if container:
+        and_filters.append(_get_container_filter(container))
+
+    # Status filter.
+    status_filter = _get_status_filter(status)
+    if status_filter:
+        and_filters.append(status_filter)
+
+    # Extra filters.
+    if extra_filters:
+        and_filters += extra_filters
+
+    or_filters: List[Dict[str, List[SearchFilterRule]]] = [{"and": and_filters}]
+
+    # Env filter
+    if env:
+        env_filters = _get_env_filters(env)
+        # This matches ALL the and_filters and at least one of the envOrConditions.
+        or_filters = [
+            {"and": and_filter["and"] + [extraCondition]}
+            for extraCondition in env_filters
+            for and_filter in or_filters
+        ]
+
+    return or_filters
+
+
+def _get_env_filters(env: str) -> List[SearchFilterRule]:
+    # The env filter is a bit more tricky since it's not always stored
+    # in the same place in ElasticSearch.
+    return [
+        # For most entity types, we look at the origin field.
+        {
+            "field": "origin",
+            "value": env,
+            "condition": "EQUAL",
+        },
+        # For containers, we look at the customProperties field.
+        # For any containers created after https://github.com/datahub-project/datahub/pull/8027,
+        # we look for the "env" property. Otherwise, we use the "instance" property.
+        {
+            "field": "customProperties",
+            "value": f"env={env}",
+        },
+        {
+            "field": "customProperties",
+            "value": f"instance={env}",
+        },
+        # Note that not all entity types have an env (e.g. dashboards / charts).
+        # If the env filter is specified, these will be excluded.
+    ]
+
+
+def _get_status_filter(status: RemovedStatusFilter) -> Optional[SearchFilterRule]:
+    if status == RemovedStatusFilter.NOT_SOFT_DELETED:
+        # Subtle: in some cases (e.g. when the dataset doesn't have a status aspect), the
+        # removed field is simply not present in the ElasticSearch document. Ideally this
+        # would be a "removed" : "false" filter, but that doesn't work. Instead, we need to
+        # use a negated filter.
+        return {
+            "field": "removed",
+            "values": ["true"],
+            "condition": "EQUAL",
+            "negated": True,
+        }
+
+    elif status == RemovedStatusFilter.ONLY_SOFT_DELETED:
+        return {
+            "field": "removed",
+            "values": ["true"],
+            "condition": "EQUAL",
+        }
+
+    elif status == RemovedStatusFilter.ALL:
+        # We don't need to add a filter for this case.
+        return None
+    else:
+        raise ValueError(f"Invalid status filter: {status}")
+
+
+def _get_container_filter(container: str) -> SearchFilterRule:
+    # Warn if container is not a fully qualified urn.
+    # TODO: Change this once we have a first-class container urn type.
+    if guess_entity_type(container) != "container":
+        raise ValueError(f"Invalid container urn: {container}")
+
+    return {
+        "field": "browsePathV2",
+        "values": [container],
+        "condition": "CONTAIN",
+    }
+
+
+def _get_platform_instance_filter(
+    platform: Optional[str], platform_instance: str
+) -> SearchFilterRule:
+    if platform:
+        # Massage the platform instance into a fully qualified urn, if necessary.
+        platform_instance = make_dataplatform_instance_urn(platform, platform_instance)
+
+    # Warn if platform_instance is not a fully qualified urn.
+    # TODO: Change this once we have a first-class data platform instance urn type.
+    if guess_entity_type(platform_instance) != "dataPlatformInstance":
+        raise ValueError(f"Invalid data platform instance urn: {platform_instance}")
+
+    return {
+        "field": "platformInstance",
+        "values": [platform_instance],
+        "condition": "EQUAL",
+    }
+
+
+def _get_platform_filter(platform: str) -> SearchFilterRule:
+    return {
+        "field": "platform.keyword",
+        "values": [make_data_platform_urn(platform)],
+        "condition": "EQUAL",
+    }

From d33a85314dc50ca22178ace1612c705b146936db Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Thu, 28 Sep 2023 21:21:04 -0500
Subject: [PATCH 064/156] fix(docker): move base image to `-base` tag, full
 image to head (#8919)

Co-authored-by: Pedro Silva <pedro@acryl.io>
Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
---
 .github/workflows/docker-unified.yml       | 18 +++++++-----------
 docker/datahub-ingestion-base/build.gradle |  2 ++
 docker/datahub-ingestion/build.gradle      |  2 ++
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml
index de3e0ca93e6b7..44785419ea63b 100644
--- a/.github/workflows/docker-unified.yml
+++ b/.github/workflows/docker-unified.yml
@@ -40,10 +40,8 @@ jobs:
     outputs:
       tag: ${{ steps.tag.outputs.tag }}
       slim_tag: ${{ steps.tag.outputs.slim_tag }}
-      full_tag: ${{ steps.tag.outputs.full_tag }}
       unique_tag: ${{ steps.tag.outputs.unique_tag }}
       unique_slim_tag: ${{ steps.tag.outputs.unique_slim_tag }}
-      unique_full_tag: ${{ steps.tag.outputs.unique_full_tag }}
       publish: ${{ steps.publish.outputs.publish }}
       python_release_version: ${{ steps.tag.outputs.python_release_version }}
     steps:
@@ -55,10 +53,8 @@ jobs:
           source .github/scripts/docker_helpers.sh
           echo "tag=$(get_tag)" >> $GITHUB_OUTPUT
           echo "slim_tag=$(get_tag)-slim" >> $GITHUB_OUTPUT
-          echo "full_tag=$(get_tag)-full" >> $GITHUB_OUTPUT
           echo "unique_tag=$(get_unique_tag)" >> $GITHUB_OUTPUT
           echo "unique_slim_tag=$(get_unique_tag)-slim" >> $GITHUB_OUTPUT
-          echo "unique_full_tag=$(get_unique_tag)" >> $GITHUB_OUTPUT
           echo "python_release_version=$(get_python_docker_release_v)" >> $GITHUB_OUTPUT
       - name: Check whether publishing enabled
         id: publish
@@ -459,7 +455,7 @@ jobs:
           platforms: linux/amd64,linux/arm64/v8
       - name: Compute DataHub Ingestion (Base) Tag
         id: tag
-        run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.tag || 'head' }}" >> $GITHUB_OUTPUT
+        run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_tag || 'head' }}" >> $GITHUB_OUTPUT
   datahub_ingestion_base_slim_build:
     name: Build and Push DataHub Ingestion (Base-Slim) Docker Image
     runs-on: ubuntu-latest
@@ -531,7 +527,7 @@ jobs:
           target: full-install
           images: |
             ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}
-          tags: ${{ needs.setup.outputs.unique_full_tag }}
+          tags: ${{ needs.setup.outputs.tag }}
           username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
           password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
           build-args: |
@@ -543,7 +539,7 @@ jobs:
           platforms: linux/amd64,linux/arm64/v8
       - name: Compute DataHub Ingestion (Base-Full) Tag
         id: tag
-        run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}" >> $GITHUB_OUTPUT
+        run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_tag || 'head' }}" >> $GITHUB_OUTPUT
 
 
   datahub_ingestion_slim_build:
@@ -656,7 +652,7 @@ jobs:
         uses: ishworkh/docker-image-artifact-download@v1
         if: ${{ needs.setup.outputs.publish != 'true' && steps.filter.outputs.datahub-ingestion-base == 'true' }}
         with:
-          image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}
+          image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_tag || 'head' }}
       - name: Build and push Full Image
         if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }}
         uses: ./.github/actions/docker-custom-build-and-push
@@ -666,9 +662,9 @@ jobs:
             ${{ env.DATAHUB_INGESTION_IMAGE }}
           build-args: |
             BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }}
-            DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}
+            DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_tag || 'head' }}
             RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }}
-          tags: ${{ needs.setup.outputs.unique_full_tag }}
+          tags: ${{ needs.setup.outputs.tag }}
           username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
           password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
           publish: ${{ needs.setup.outputs.publish }}
@@ -677,7 +673,7 @@ jobs:
           platforms: linux/amd64,linux/arm64/v8
       - name: Compute Tag (Full)
         id: tag
-        run: echo "tag=${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && needs.setup.outputs.unique_full_tag || 'head' }}" >> $GITHUB_OUTPUT
+        run: echo "tag=${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && needs.setup.outputs.unique_tag || 'head' }}" >> $GITHUB_OUTPUT
   datahub_ingestion_full_scan:
     permissions:
       contents: read # for actions/checkout to fetch code
diff --git a/docker/datahub-ingestion-base/build.gradle b/docker/datahub-ingestion-base/build.gradle
index 10cd2ee71cce3..fe65bccde4a07 100644
--- a/docker/datahub-ingestion-base/build.gradle
+++ b/docker/datahub-ingestion-base/build.gradle
@@ -9,6 +9,8 @@ ext {
     docker_registry = rootProject.ext.docker_registry == 'linkedin' ? 'acryldata' : docker_registry
     docker_repo = 'datahub-ingestion-base'
     docker_dir = 'datahub-ingestion-base'
+
+    revision = 0 // increment to trigger rebuild
 }
 
 docker {
diff --git a/docker/datahub-ingestion/build.gradle b/docker/datahub-ingestion/build.gradle
index 307594018c92e..293437376ae71 100644
--- a/docker/datahub-ingestion/build.gradle
+++ b/docker/datahub-ingestion/build.gradle
@@ -9,6 +9,8 @@ ext {
     docker_registry = rootProject.ext.docker_registry == 'linkedin' ? 'acryldata' : docker_registry
     docker_repo = 'datahub-ingestion'
     docker_dir = 'datahub-ingestion'
+
+    revision = 0 // increment to trigger rebuild
 }
 
 dependencies {

From b45b7f7d2a369a5ecc3c9b0900acd98b0f0a2734 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Fri, 29 Sep 2023 17:49:36 -0500
Subject: [PATCH 065/156] fix(docker): slim tags (#8922)

---
 .github/scripts/docker_helpers.sh          | 16 ++++++++++++++++
 .github/workflows/docker-unified.yml       | 14 +++++++++-----
 docker/datahub-ingestion-base/build.gradle |  2 +-
 docker/datahub-ingestion/build.gradle      |  2 +-
 4 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/.github/scripts/docker_helpers.sh b/.github/scripts/docker_helpers.sh
index 63c53b2c3d02f..a74d90455acd6 100755
--- a/.github/scripts/docker_helpers.sh
+++ b/.github/scripts/docker_helpers.sh
@@ -15,10 +15,26 @@ function get_tag {
     echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}\,${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g')
 }
 
+function get_tag_slim {
+    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}\,${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
+}
+
+function get_tag_full {
+    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}\,${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
+}
+
 function get_python_docker_release_v {
     echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},0.0.0+docker.${SHORT_SHA},g" -e 's,refs/tags/v\(.*\),\1+docker,g' -e 's,refs/pull/\([0-9]*\).*,0.0.0+docker.pr\1,g')
 }
 
 function get_unique_tag {
     echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g')
+}
+
+function get_unique_tag_slim {
+    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
+}
+
+function get_unique_tag_full {
+    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
 }
\ No newline at end of file
diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml
index 44785419ea63b..2aae6bf51529d 100644
--- a/.github/workflows/docker-unified.yml
+++ b/.github/workflows/docker-unified.yml
@@ -40,8 +40,10 @@ jobs:
     outputs:
       tag: ${{ steps.tag.outputs.tag }}
       slim_tag: ${{ steps.tag.outputs.slim_tag }}
+      full_tag: ${{ steps.tag.outputs.full_tag }}
       unique_tag: ${{ steps.tag.outputs.unique_tag }}
       unique_slim_tag: ${{ steps.tag.outputs.unique_slim_tag }}
+      unique_full_tag: ${{ steps.tag.outputs.unique_full_tag }}
       publish: ${{ steps.publish.outputs.publish }}
       python_release_version: ${{ steps.tag.outputs.python_release_version }}
     steps:
@@ -52,9 +54,11 @@ jobs:
         run: |
           source .github/scripts/docker_helpers.sh
           echo "tag=$(get_tag)" >> $GITHUB_OUTPUT
-          echo "slim_tag=$(get_tag)-slim" >> $GITHUB_OUTPUT
+          echo "slim_tag=$(get_tag_slim)" >> $GITHUB_OUTPUT
+          echo "full_tag=$(get_tag_full)" >> $GITHUB_OUTPUT
           echo "unique_tag=$(get_unique_tag)" >> $GITHUB_OUTPUT
-          echo "unique_slim_tag=$(get_unique_tag)-slim" >> $GITHUB_OUTPUT
+          echo "unique_slim_tag=$(get_unique_tag_slim)" >> $GITHUB_OUTPUT
+          echo "unique_full_tag=$(get_unique_tag_full)" >> $GITHUB_OUTPUT
           echo "python_release_version=$(get_python_docker_release_v)" >> $GITHUB_OUTPUT
       - name: Check whether publishing enabled
         id: publish
@@ -520,14 +524,14 @@ jobs:
         if: ${{ needs.setup.outputs.publish != 'true' &&  steps.filter.outputs.datahub-ingestion-base == 'true' }}
         with:
           image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_tag || 'head' }}
-      - name: Build and push Base-Full Image
+      - name: Build and push (Base-Full) Image
         if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' }}
         uses: ./.github/actions/docker-custom-build-and-push
         with:
           target: full-install
           images: |
             ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}
-          tags: ${{ needs.setup.outputs.tag }}
+          tags: ${{ needs.setup.outputs.full_tag }}
           username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
           password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
           build-args: |
@@ -539,7 +543,7 @@ jobs:
           platforms: linux/amd64,linux/arm64/v8
       - name: Compute DataHub Ingestion (Base-Full) Tag
         id: tag
-        run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_tag || 'head' }}" >> $GITHUB_OUTPUT
+        run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}" >> $GITHUB_OUTPUT
 
 
   datahub_ingestion_slim_build:
diff --git a/docker/datahub-ingestion-base/build.gradle b/docker/datahub-ingestion-base/build.gradle
index fe65bccde4a07..84dd7f557bf9b 100644
--- a/docker/datahub-ingestion-base/build.gradle
+++ b/docker/datahub-ingestion-base/build.gradle
@@ -10,7 +10,7 @@ ext {
     docker_repo = 'datahub-ingestion-base'
     docker_dir = 'datahub-ingestion-base'
 
-    revision = 0 // increment to trigger rebuild
+    revision = 1 // increment to trigger rebuild
 }
 
 docker {
diff --git a/docker/datahub-ingestion/build.gradle b/docker/datahub-ingestion/build.gradle
index 293437376ae71..6dd20c15a055b 100644
--- a/docker/datahub-ingestion/build.gradle
+++ b/docker/datahub-ingestion/build.gradle
@@ -10,7 +10,7 @@ ext {
     docker_repo = 'datahub-ingestion'
     docker_dir = 'datahub-ingestion'
 
-    revision = 0 // increment to trigger rebuild
+    revision = 1 // increment to trigger rebuild
 }
 
 dependencies {

From 4d9a7ce7c94419307617bb8ad56477a301516de2 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Fri, 29 Sep 2023 23:07:45 -0500
Subject: [PATCH 066/156] ci: Docker slim tag fix (#8925)

---
 .github/scripts/docker_helpers.sh          | 8 ++++----
 docker/datahub-ingestion-base/build.gradle | 2 +-
 docker/datahub-ingestion/build.gradle      | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/scripts/docker_helpers.sh b/.github/scripts/docker_helpers.sh
index a74d90455acd6..f238c5c409184 100755
--- a/.github/scripts/docker_helpers.sh
+++ b/.github/scripts/docker_helpers.sh
@@ -16,11 +16,11 @@ function get_tag {
 }
 
 function get_tag_slim {
-    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}\,${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
+    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-slim\,${SHORT_SHA}-slim,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
 }
 
 function get_tag_full {
-    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}\,${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
+    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-full\,${SHORT_SHA}-full,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
 }
 
 function get_python_docker_release_v {
@@ -32,9 +32,9 @@ function get_unique_tag {
 }
 
 function get_unique_tag_slim {
-    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
+    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA}-slim,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
 }
 
 function get_unique_tag_full {
-    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
+    echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA}-full,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
 }
\ No newline at end of file
diff --git a/docker/datahub-ingestion-base/build.gradle b/docker/datahub-ingestion-base/build.gradle
index 84dd7f557bf9b..bbd8242553cc5 100644
--- a/docker/datahub-ingestion-base/build.gradle
+++ b/docker/datahub-ingestion-base/build.gradle
@@ -10,7 +10,7 @@ ext {
     docker_repo = 'datahub-ingestion-base'
     docker_dir = 'datahub-ingestion-base'
 
-    revision = 1 // increment to trigger rebuild
+    revision = 2 // increment to trigger rebuild
 }
 
 docker {
diff --git a/docker/datahub-ingestion/build.gradle b/docker/datahub-ingestion/build.gradle
index 6dd20c15a055b..fed33752a4b81 100644
--- a/docker/datahub-ingestion/build.gradle
+++ b/docker/datahub-ingestion/build.gradle
@@ -10,7 +10,7 @@ ext {
     docker_repo = 'datahub-ingestion'
     docker_dir = 'datahub-ingestion'
 
-    revision = 1 // increment to trigger rebuild
+    revision = 2 // increment to trigger rebuild
 }
 
 dependencies {

From b61c38ab0539f546b65f3122962a0f84d215e581 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Sat, 30 Sep 2023 22:47:59 -0500
Subject: [PATCH 067/156] =?UTF-8?q?refactor(misc):=20testngJava=20fix,=20s?=
 =?UTF-8?q?ystemrestli=20client,=20cache=20key=20fix,=20e=E2=80=A6=20(#892?=
 =?UTF-8?q?6)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 build.gradle                                  |  2 +-
 .../system/elasticsearch/util/IndexUtils.java |  2 +-
 ...pgradeCliApplicationTestConfiguration.java |  4 +
 .../linkedin/metadata/entity/AspectDao.java   |  4 +
 .../metadata/entity/EntityServiceImpl.java    |  9 +-
 .../entity/cassandra/CassandraAspectDao.java  |  7 ++
 .../metadata/entity/ebean/EbeanAspectDao.java | 13 +++
 .../elastic/ElasticSearchGraphService.java    |  4 +-
 .../elasticsearch/ElasticSearchService.java   |  4 +-
 .../indexbuilder/ESIndexBuilder.java          | 27 ++++--
 .../indexbuilder/EntityIndexBuilder.java      | 35 --------
 .../indexbuilder/EntityIndexBuilders.java     | 59 +++++++------
 .../indexbuilder/MappingsBuilder.java         | 86 +++++++++++--------
 .../indexbuilder/ReindexConfig.java           | 24 ++++--
 .../SearchDocumentTransformer.java            | 23 +++--
 .../service/UpdateIndicesService.java         | 50 ++++++++---
 .../metadata/shared/ElasticSearchIndexed.java |  2 +-
 .../ElasticSearchSystemMetadataService.java   |  4 +-
 .../ElasticSearchTimeseriesAspectService.java |  5 +-
 .../TimeseriesAspectIndexBuilders.java        |  5 +-
 .../entity/EbeanAspectMigrationsDaoTest.java  | 31 +++++--
 .../io/datahubproject/test/DataGenerator.java | 22 ++++-
 .../src/main/resources/application.properties |  2 +-
 ...eConsumerApplicationTestConfiguration.java |  4 +
 .../kafka/MetadataChangeLogProcessor.java     |  7 +-
 .../kafka/hook/MetadataChangeLogHook.java     |  8 ++
 .../kafka/hook/UpdateIndicesHook.java         |  2 +-
 .../kafka/hook/UpdateIndicesHookTest.java     | 15 +++-
 .../spring/MCLSpringTestConfiguration.java    |  4 +
 ...eConsumerApplicationTestConfiguration.java |  4 +
 .../src/main/resources/application.yml        |  2 +-
 .../factory/entity/EntityServiceFactory.java  | 18 ++--
 .../entity/JavaEntityClientFactory.java       |  9 +-
 .../indices/UpdateIndicesServiceFactory.java  | 28 +++++-
 .../search/ElasticSearchServiceFactory.java   |  7 +-
 .../search/EntityIndexBuildersFactory.java    | 35 ++++++++
 .../entity/client/EntityClientCache.java      | 12 ++-
 .../metadata/entity/EntityService.java        |  8 ++
 38 files changed, 404 insertions(+), 183 deletions(-)
 delete mode 100644 metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilder.java
 create mode 100644 metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/EntityIndexBuildersFactory.java

diff --git a/build.gradle b/build.gradle
index 0a94991b131aa..c8892045a6683 100644
--- a/build.gradle
+++ b/build.gradle
@@ -291,7 +291,7 @@ subprojects {
     maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
 
     if (project.configurations.getByName("testImplementation").getDependencies()
-            .any{ it.getName() == "testng" }) {
+            .any{ it.getName().contains("testng") }) {
       useTestNG()
     }
   }
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java
index 4b04feac62cbf..d9788448444ed 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java
@@ -31,7 +31,7 @@ public static List<ReindexConfig> getAllReindexConfigs(List<ElasticSearchIndexed
     List<ReindexConfig> reindexConfigs = new ArrayList<>(_reindexConfigs);
     if (reindexConfigs.isEmpty()) {
       for (ElasticSearchIndexed elasticSearchIndexed : elasticSearchIndexedList) {
-        reindexConfigs.addAll(elasticSearchIndexed.getReindexConfigs());
+        reindexConfigs.addAll(elasticSearchIndexed.buildReindexConfigs());
       }
       _reindexConfigs = new ArrayList<>(reindexConfigs);
     }
diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java
index b1bdead58a72b..6cc853b2c7c4d 100644
--- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java
+++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java
@@ -6,6 +6,7 @@
 import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.search.SearchService;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
 import io.ebean.Database;
 import org.springframework.boot.test.context.TestConfiguration;
 import org.springframework.boot.test.mock.mockito.MockBean;
@@ -35,4 +36,7 @@ public class UpgradeCliApplicationTestConfiguration {
 
     @MockBean
     ConfigEntityRegistry configEntityRegistry;
+
+    @MockBean
+    public EntityIndexBuilders entityIndexBuilders;
 }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java
index 2d5c5e23ae528..42dd3f0405a6a 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java
@@ -8,6 +8,7 @@
 import io.ebean.PagedList;
 import io.ebean.Transaction;
 
+import java.util.stream.Stream;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 import java.sql.Timestamp;
@@ -103,6 +104,9 @@ Integer countAspect(
     @Nonnull
     PagedList<EbeanAspectV2> getPagedAspects(final RestoreIndicesArgs args);
 
+    @Nonnull
+    Stream<EntityAspect> streamAspects(String entityName, String aspectName);
+
     int deleteUrn(@Nullable Transaction tx, @Nonnull final String urn);
 
     @Nonnull
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java
index 66188473b9d03..57f88e31deea5 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java
@@ -3,6 +3,7 @@
 import com.codahale.metrics.Timer;
 import com.linkedin.data.template.GetMode;
 import com.linkedin.data.template.SetMode;
+import com.linkedin.entity.client.SystemEntityClient;
 import com.linkedin.metadata.config.PreProcessHooks;
 import com.datahub.util.RecordUtils;
 import com.datahub.util.exception.ModelConversionException;
@@ -93,6 +94,7 @@
 import javax.persistence.EntityNotFoundException;
 
 import io.ebean.Transaction;
+import lombok.Getter;
 import lombok.extern.slf4j.Slf4j;
 
 import static com.linkedin.metadata.Constants.*;
@@ -144,11 +146,11 @@ public class EntityServiceImpl implements EntityService {
   private final Map<String, Set<String>> _entityToValidAspects;
   private RetentionService _retentionService;
   private final Boolean _alwaysEmitChangeLog;
+  @Getter
   private final UpdateIndicesService _updateIndicesService;
   private final PreProcessHooks _preProcessHooks;
   protected static final int MAX_KEYS_PER_QUERY = 500;
 
-
   private final Integer ebeanMaxTransactionRetry;
 
   public EntityServiceImpl(
@@ -180,6 +182,11 @@ public EntityServiceImpl(
     ebeanMaxTransactionRetry = retry != null ? retry : DEFAULT_MAX_TRANSACTION_RETRY;
   }
 
+  @Override
+  public void setSystemEntityClient(SystemEntityClient systemEntityClient) {
+    this._updateIndicesService.setSystemEntityClient(systemEntityClient);
+  }
+
   /**
    * Retrieves the latest aspects corresponding to a batch of {@link Urn}s based on a provided
    * set of aspect names.
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java
index b215dd4a5d1ed..9f4a36efb4501 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java
@@ -41,6 +41,7 @@
 import java.util.Set;
 import java.util.function.Function;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 
@@ -445,6 +446,12 @@ public PagedList<EbeanAspectV2> getPagedAspects(final RestoreIndicesArgs args) {
     return null;
   }
 
+  @Nonnull
+  @Override
+  public Stream<EntityAspect> streamAspects(String entityName, String aspectName) {
+    // Not implemented
+    return null;
+  }
 
   @Override
   @Nonnull
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java
index 30886db264994..c16c98b34f3eb 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java
@@ -42,6 +42,7 @@
 import java.util.Set;
 import java.util.function.Function;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 
@@ -433,6 +434,18 @@ public PagedList<EbeanAspectV2> getPagedAspects(final RestoreIndicesArgs args) {
             .findPagedList();
   }
 
+  @Override
+  @Nonnull
+  public Stream<EntityAspect> streamAspects(String entityName, String aspectName) {
+    ExpressionList<EbeanAspectV2> exp = _server.find(EbeanAspectV2.class)
+        .select(EbeanAspectV2.ALL_COLUMNS)
+        .where()
+        .eq(EbeanAspectV2.VERSION_COLUMN, ASPECT_LATEST_VERSION)
+        .eq(EbeanAspectV2.ASPECT_COLUMN, aspectName)
+        .like(EbeanAspectV2.URN_COLUMN, "urn:li:" + entityName + ":%");
+    return exp.query().findStream().map(EbeanAspectV2::toEntityAspect);
+  }
+
   @Override
   @Nonnull
   public Iterable<String> listAllUrns(int start, int pageSize) {
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java
index 02e36af343b07..5fdf4d45ffa3b 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java
@@ -318,7 +318,7 @@ public void removeEdgesFromNode(
   public void configure() {
     log.info("Setting up elastic graph index");
     try {
-      for (ReindexConfig config : getReindexConfigs()) {
+      for (ReindexConfig config : buildReindexConfigs()) {
         _indexBuilder.buildIndex(config);
       }
     } catch (IOException e) {
@@ -327,7 +327,7 @@ public void configure() {
   }
 
   @Override
-  public List<ReindexConfig> getReindexConfigs() throws IOException {
+  public List<ReindexConfig> buildReindexConfigs() throws IOException {
     return List.of(_indexBuilder.buildReindexState(_indexConvention.getIndexName(INDEX_NAME),
             GraphRelationshipMappingsBuilder.getMappings(), Collections.emptyMap()));
   }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java
index bf4dffe9e5fb8..ef5a555e95ba8 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java
@@ -46,8 +46,8 @@ public void configure() {
   }
 
   @Override
-  public List<ReindexConfig> getReindexConfigs() {
-    return indexBuilders.getReindexConfigs();
+  public List<ReindexConfig> buildReindexConfigs() {
+    return indexBuilders.buildReindexConfigs();
   }
 
   @Override
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java
index 10c2fd725dca9..43431e93622f7 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java
@@ -206,12 +206,7 @@ public void buildIndex(ReindexConfig indexState) throws IOException {
       // no need to reindex and only new mappings or dynamic settings
 
       // Just update the additional mappings
-      if (indexState.isPureMappingsAddition()) {
-        log.info("Updating index {} mappings in place.", indexState.name());
-        PutMappingRequest request = new PutMappingRequest(indexState.name()).source(indexState.targetMappings());
-        _searchClient.indices().putMapping(request, RequestOptions.DEFAULT);
-        log.info("Updated index {} with new mappings", indexState.name());
-      }
+      applyMappings(indexState, true);
 
       if (indexState.requiresApplySettings()) {
         UpdateSettingsRequest request = new UpdateSettingsRequest(indexState.name());
@@ -234,6 +229,26 @@ public void buildIndex(ReindexConfig indexState) throws IOException {
     }
   }
 
+  /**
+   * Apply mappings changes if reindex is not required
+   * @param indexState the state of the current and target index settings/mappings
+   * @param suppressError during reindex logic this is not an error, for structured properties it is an error
+   * @throws IOException communication issues with ES
+   */
+  public void applyMappings(ReindexConfig indexState, boolean suppressError) throws IOException {
+    if (indexState.isPureMappingsAddition()) {
+      log.info("Updating index {} mappings in place.", indexState.name());
+      PutMappingRequest request = new PutMappingRequest(indexState.name()).source(indexState.targetMappings());
+      _searchClient.indices().putMapping(request, RequestOptions.DEFAULT);
+      log.info("Updated index {} with new mappings", indexState.name());
+    } else {
+      if (!suppressError) {
+        log.error("Attempted to apply invalid mappings. Current: {} Target: {}", indexState.currentMappings(),
+                indexState.targetMappings());
+      }
+    }
+  }
+
   public String reindexInPlaceAsync(String indexAlias, @Nullable QueryBuilder filterQuery, BatchWriteOperationsOptions options, ReindexConfig config)
       throws Exception {
     GetAliasesResponse aliasesResponse = _searchClient.indices().getAlias(
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilder.java
deleted file mode 100644
index 04c9f1993ff35..0000000000000
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilder.java
+++ /dev/null
@@ -1,35 +0,0 @@
-package com.linkedin.metadata.search.elasticsearch.indexbuilder;
-
-import com.linkedin.metadata.models.EntitySpec;
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
-import com.linkedin.metadata.shared.ElasticSearchIndexed;
-import lombok.RequiredArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-
-
-@Slf4j
-@RequiredArgsConstructor
-public class EntityIndexBuilder implements ElasticSearchIndexed {
-  private final ESIndexBuilder indexBuilder;
-  private final EntitySpec entitySpec;
-  private final SettingsBuilder settingsBuilder;
-  private final String indexName;
-
-  @Override
-  public void reindexAll() throws IOException {
-    log.info("Setting up index: {}", indexName);
-    for (ReindexConfig config : getReindexConfigs()) {
-      indexBuilder.buildIndex(config);
-    }
-  }
-
-  @Override
-  public List<ReindexConfig> getReindexConfigs() throws IOException {
-    Map<String, Object> mappings = MappingsBuilder.getMappings(entitySpec);
-    Map<String, Object> settings = settingsBuilder.getSettings();
-    return List.of(indexBuilder.buildReindexState(indexName, mappings, settings));
-  }
-}
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilders.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilders.java
index f38418058ca6d..56cb26b09dc33 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilders.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilders.java
@@ -3,8 +3,10 @@
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.shared.ElasticSearchIndexed;
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
+
 import java.io.IOException;
 import java.util.List;
+import java.util.Map;
 import java.util.stream.Collectors;
 
 import lombok.RequiredArgsConstructor;
@@ -14,32 +16,37 @@
 @RequiredArgsConstructor
 @Slf4j
 public class EntityIndexBuilders implements ElasticSearchIndexed {
-  private final ESIndexBuilder indexBuilder;
-  private final EntityRegistry entityRegistry;
-  private final IndexConvention indexConvention;
-  private final SettingsBuilder settingsBuilder;
-
-  @Override
-  public void reindexAll() {
-      for (ReindexConfig config : getReindexConfigs()) {
-          try {
-              indexBuilder.buildIndex(config);
-          } catch (IOException e) {
-              throw new RuntimeException(e);
-          }
-      }
-  }
-
-  @Override
-  public List<ReindexConfig> getReindexConfigs() {
-    return entityRegistry.getEntitySpecs().values().stream().flatMap(entitySpec -> {
-                      try {
-                        return new EntityIndexBuilder(indexBuilder, entitySpec, settingsBuilder, indexConvention.getIndexName(entitySpec))
-                                .getReindexConfigs().stream();
-                      } catch (IOException e) {
+    private final ESIndexBuilder indexBuilder;
+    private final EntityRegistry entityRegistry;
+    private final IndexConvention indexConvention;
+    private final SettingsBuilder settingsBuilder;
+
+    public ESIndexBuilder getIndexBuilder() {
+        return indexBuilder;
+    }
+
+    @Override
+    public void reindexAll() {
+        for (ReindexConfig config : buildReindexConfigs()) {
+            try {
+                indexBuilder.buildIndex(config);
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        }
+    }
+
+    @Override
+    public List<ReindexConfig> buildReindexConfigs() {
+        Map<String, Object> settings = settingsBuilder.getSettings();
+        return entityRegistry.getEntitySpecs().values().stream().map(entitySpec -> {
+                    try {
+                        Map<String, Object> mappings = MappingsBuilder.getMappings(entitySpec);
+                        return indexBuilder.buildReindexState(indexConvention.getIndexName(entitySpec), mappings, settings);
+                    } catch (IOException e) {
                         throw new RuntimeException(e);
-                      }
                     }
-            ).collect(Collectors.toList());
-  }
+                }
+        ).collect(Collectors.toList());
+    }
 }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java
index b3e05d966e36b..004b2e0a2adc4 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java
@@ -51,6 +51,8 @@ public static Map<String, String> getPartialNgramConfigWithOverrides(Map<String,
   public static final String ALIAS = "alias";
   public static final String PATH = "path";
 
+  public static final String PROPERTIES = "properties";
+
   private MappingsBuilder() {
   }
 
@@ -66,7 +68,7 @@ public static Map<String, Object> getMappings(@Nonnull final EntitySpec entitySp
     mappings.put("urn", getMappingsForUrn());
     mappings.put("runId", getMappingsForRunId());
 
-    return ImmutableMap.of("properties", mappings);
+    return ImmutableMap.of(PROPERTIES, mappings);
   }
 
   private static Map<String, Object> getMappingsForUrn() {
@@ -98,42 +100,9 @@ private static Map<String, Object> getMappingsForField(@Nonnull final Searchable
     Map<String, Object> mappings = new HashMap<>();
     Map<String, Object> mappingForField = new HashMap<>();
     if (fieldType == FieldType.KEYWORD) {
-      mappingForField.put(TYPE, KEYWORD);
-      mappingForField.put(NORMALIZER, KEYWORD_NORMALIZER);
-      // Add keyword subfield without lowercase filter
-      mappingForField.put(FIELDS, ImmutableMap.of(KEYWORD, KEYWORD_TYPE_MAP));
+      mappingForField.putAll(getMappingsForKeyword());
     } else if (fieldType == FieldType.TEXT || fieldType == FieldType.TEXT_PARTIAL || fieldType == FieldType.WORD_GRAM) {
-      mappingForField.put(TYPE, KEYWORD);
-      mappingForField.put(NORMALIZER, KEYWORD_NORMALIZER);
-      Map<String, Object> subFields = new HashMap<>();
-      if (fieldType == FieldType.TEXT_PARTIAL || fieldType == FieldType.WORD_GRAM) {
-        subFields.put(NGRAM, getPartialNgramConfigWithOverrides(
-                ImmutableMap.of(
-                        ANALYZER, PARTIAL_ANALYZER
-                )
-        ));
-        if (fieldType == FieldType.WORD_GRAM) {
-          for (Map.Entry<String, String> entry : Map.of(
-              WORD_GRAMS_LENGTH_2, WORD_GRAM_2_ANALYZER,
-              WORD_GRAMS_LENGTH_3, WORD_GRAM_3_ANALYZER,
-              WORD_GRAMS_LENGTH_4, WORD_GRAM_4_ANALYZER).entrySet()) {
-            String fieldName = entry.getKey();
-            String analyzerName = entry.getValue();
-            subFields.put(fieldName, ImmutableMap.of(
-                TYPE, TEXT,
-                ANALYZER, analyzerName
-            ));
-          }
-        }
-      }
-      subFields.put(DELIMITED, ImmutableMap.of(
-              TYPE, TEXT,
-              ANALYZER, TEXT_ANALYZER,
-              SEARCH_ANALYZER, TEXT_SEARCH_ANALYZER,
-              SEARCH_QUOTE_ANALYZER, CUSTOM_QUOTE_ANALYZER));
-      // Add keyword subfield without lowercase filter
-      subFields.put(KEYWORD, KEYWORD_TYPE_MAP);
-      mappingForField.put(FIELDS, subFields);
+      mappingForField.putAll(getMappingsForSearchText(fieldType));
     } else if (fieldType == FieldType.BROWSE_PATH) {
       mappingForField.put(TYPE, TEXT);
       mappingForField.put(FIELDS,
@@ -189,6 +158,51 @@ private static Map<String, Object> getMappingsForField(@Nonnull final Searchable
     return mappings;
   }
 
+  private static Map<String, Object> getMappingsForKeyword() {
+    Map<String, Object> mappingForField = new HashMap<>();
+    mappingForField.put(TYPE, KEYWORD);
+    mappingForField.put(NORMALIZER, KEYWORD_NORMALIZER);
+    // Add keyword subfield without lowercase filter
+    mappingForField.put(FIELDS, ImmutableMap.of(KEYWORD, KEYWORD_TYPE_MAP));
+    return mappingForField;
+  }
+
+  private static Map<String, Object> getMappingsForSearchText(FieldType fieldType) {
+    Map<String, Object> mappingForField = new HashMap<>();
+    mappingForField.put(TYPE, KEYWORD);
+    mappingForField.put(NORMALIZER, KEYWORD_NORMALIZER);
+    Map<String, Object> subFields = new HashMap<>();
+    if (fieldType == FieldType.TEXT_PARTIAL || fieldType == FieldType.WORD_GRAM) {
+      subFields.put(NGRAM, getPartialNgramConfigWithOverrides(
+              ImmutableMap.of(
+                      ANALYZER, PARTIAL_ANALYZER
+              )
+      ));
+      if (fieldType == FieldType.WORD_GRAM) {
+        for (Map.Entry<String, String> entry : Map.of(
+                WORD_GRAMS_LENGTH_2, WORD_GRAM_2_ANALYZER,
+                WORD_GRAMS_LENGTH_3, WORD_GRAM_3_ANALYZER,
+                WORD_GRAMS_LENGTH_4, WORD_GRAM_4_ANALYZER).entrySet()) {
+          String fieldName = entry.getKey();
+          String analyzerName = entry.getValue();
+          subFields.put(fieldName, ImmutableMap.of(
+                  TYPE, TEXT,
+                  ANALYZER, analyzerName
+          ));
+        }
+      }
+    }
+    subFields.put(DELIMITED, ImmutableMap.of(
+            TYPE, TEXT,
+            ANALYZER, TEXT_ANALYZER,
+            SEARCH_ANALYZER, TEXT_SEARCH_ANALYZER,
+            SEARCH_QUOTE_ANALYZER, CUSTOM_QUOTE_ANALYZER));
+    // Add keyword subfield without lowercase filter
+    subFields.put(KEYWORD, KEYWORD_TYPE_MAP);
+    mappingForField.put(FIELDS, subFields);
+    return mappingForField;
+  }
+
   private static Map<String, Object> getMappingsForSearchScoreField(
       @Nonnull final SearchScoreFieldSpec searchScoreFieldSpec) {
     return ImmutableMap.of(searchScoreFieldSpec.getSearchScoreAnnotation().getFieldName(),
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java
index 4f5f2926d3da0..8b8a48f5d9cda 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java
@@ -121,13 +121,14 @@ public ReindexConfig build() {
             if (super.exists) {
                 /* Consider mapping changes */
                 MapDifference<String, Object> mappingsDiff = Maps.difference(
-                        (TreeMap<String, Object>) super.currentMappings.getOrDefault("properties", new TreeMap()),
-                        (TreeMap<String, Object>) super.targetMappings.getOrDefault("properties", new TreeMap()));
+                        getOrDefault(super.currentMappings, List.of("properties")),
+                        getOrDefault(super.targetMappings, List.of("properties")));
                 super.requiresApplyMappings = !mappingsDiff.entriesDiffering().isEmpty()
                         || !mappingsDiff.entriesOnlyOnRight().isEmpty();
                 super.isPureMappingsAddition = super.requiresApplyMappings
                         && mappingsDiff.entriesDiffering().isEmpty()
                         && !mappingsDiff.entriesOnlyOnRight().isEmpty();
+
                 if (super.requiresApplyMappings && super.isPureMappingsAddition) {
                     log.info("Index: {} - New fields have been added to index. Adding: {}",
                             super.name, mappingsDiff.entriesOnlyOnRight());
@@ -171,8 +172,21 @@ public ReindexConfig build() {
             return super.build();
         }
 
+        private static TreeMap<String, Object> getOrDefault(Map<String, Object> map, List<String> path) {
+            if (map == null) {
+                return new TreeMap<>();
+            }
+
+            TreeMap<String, Object> item = (TreeMap<String, Object>) map.getOrDefault(path.get(0), new TreeMap());
+            if (path.size() == 1) {
+                return item;
+            } else {
+                return getOrDefault(item, path.subList(1, path.size()));
+            }
+        }
+
         private boolean isAnalysisEqual() {
-            if (!super.targetSettings.containsKey("index")) {
+            if (super.targetSettings == null || !super.targetSettings.containsKey("index")) {
                 return true;
             }
             Map<String, Object> indexSettings = (Map<String, Object>) super.targetSettings.get("index");
@@ -186,7 +200,7 @@ private boolean isAnalysisEqual() {
         }
 
         private boolean isSettingsEqual() {
-            if (!super.targetSettings.containsKey("index")) {
+            if (super.targetSettings == null || !super.targetSettings.containsKey("index")) {
                 return true;
             }
             Map<String, Object> indexSettings = (Map<String, Object>) super.targetSettings.get("index");
@@ -196,7 +210,7 @@ private boolean isSettingsEqual() {
         }
 
         private boolean isSettingsReindexRequired() {
-            if (!super.targetSettings.containsKey("index")) {
+            if (super.targetSettings == null || !super.targetSettings.containsKey("index")) {
                 return false;
             }
             Map<String, Object> indexSettings = (Map<String, Object>) super.targetSettings.get("index");
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java
index 76f4736f2746e..49809cf933936 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java
@@ -7,6 +7,7 @@
 import com.linkedin.common.urn.Urn;
 import com.linkedin.data.schema.DataSchema;
 import com.linkedin.data.template.RecordTemplate;
+import com.linkedin.entity.client.SystemEntityClient;
 import com.linkedin.metadata.models.AspectSpec;
 import com.linkedin.metadata.models.EntitySpec;
 import com.linkedin.metadata.models.SearchScoreFieldSpec;
@@ -21,6 +22,7 @@
 import java.util.stream.Collectors;
 
 import lombok.RequiredArgsConstructor;
+import lombok.Setter;
 import lombok.extern.slf4j.Slf4j;
 
 import javax.annotation.Nonnull;
@@ -30,6 +32,7 @@
  * Class that provides a utility function that transforms the snapshot object into a search document
  */
 @Slf4j
+@Setter
 @RequiredArgsConstructor
 public class SearchDocumentTransformer {
 
@@ -42,6 +45,8 @@ public class SearchDocumentTransformer {
   // Maximum customProperties value length
   private final int maxValueLength;
 
+  private SystemEntityClient entityClient;
+
    private static final String BROWSE_PATH_V2_DELIMITER = "␟";
 
   public Optional<String> transformSnapshot(final RecordTemplate snapshot, final EntitySpec entitySpec,
@@ -72,14 +77,18 @@ public Optional<String> transformAspect(
         FieldExtractor.extractFields(aspect, aspectSpec.getSearchableFieldSpecs(), maxValueLength);
     final Map<SearchScoreFieldSpec, List<Object>> extractedSearchScoreFields =
         FieldExtractor.extractFields(aspect, aspectSpec.getSearchScoreFieldSpecs(), maxValueLength);
-    if (extractedSearchableFields.isEmpty() && extractedSearchScoreFields.isEmpty()) {
-      return Optional.empty();
+
+    Optional<String> result = Optional.empty();
+
+    if (!extractedSearchableFields.isEmpty() || !extractedSearchScoreFields.isEmpty()) {
+      final ObjectNode searchDocument = JsonNodeFactory.instance.objectNode();
+      searchDocument.put("urn", urn.toString());
+      extractedSearchableFields.forEach((key, values) -> setSearchableValue(key, values, searchDocument, forDelete));
+      extractedSearchScoreFields.forEach((key, values) -> setSearchScoreValue(key, values, searchDocument, forDelete));
+      result = Optional.of(searchDocument.toString());
     }
-    final ObjectNode searchDocument = JsonNodeFactory.instance.objectNode();
-    searchDocument.put("urn", urn.toString());
-    extractedSearchableFields.forEach((key, values) -> setSearchableValue(key, values, searchDocument, forDelete));
-    extractedSearchScoreFields.forEach((key, values) -> setSearchScoreValue(key, values, searchDocument, forDelete));
-    return Optional.of(searchDocument.toString());
+
+    return result;
   }
 
   public void setSearchableValue(final SearchableFieldSpec fieldSpec, final List<Object> fieldValues,
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java
index 36b685f084d51..ea7286112f870 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java
@@ -12,6 +12,7 @@
 import com.linkedin.data.template.RecordTemplate;
 import com.linkedin.dataset.FineGrainedLineage;
 import com.linkedin.dataset.UpstreamLineage;
+import com.linkedin.entity.client.SystemEntityClient;
 import com.linkedin.events.metadata.ChangeType;
 import com.linkedin.metadata.Constants;
 import com.linkedin.metadata.graph.Edge;
@@ -28,6 +29,7 @@
 import com.linkedin.metadata.query.filter.Filter;
 import com.linkedin.metadata.query.filter.RelationshipDirection;
 import com.linkedin.metadata.search.EntitySearchService;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
 import com.linkedin.metadata.search.transformer.SearchDocumentTransformer;
 import com.linkedin.metadata.search.utils.SearchUtils;
 import com.linkedin.metadata.systemmetadata.SystemMetadataService;
@@ -39,6 +41,8 @@
 import com.linkedin.mxe.MetadataChangeLog;
 import com.linkedin.mxe.SystemMetadata;
 import com.linkedin.util.Pair;
+
+import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.net.URLEncoder;
 import java.util.ArrayList;
@@ -68,6 +72,7 @@ public class UpdateIndicesService {
   private final SystemMetadataService _systemMetadataService;
   private final EntityRegistry _entityRegistry;
   private final SearchDocumentTransformer _searchDocumentTransformer;
+  private final EntityIndexBuilders _entityIndexBuilders;
 
   @Value("${featureFlags.graphServiceDiffModeEnabled:true}")
   private boolean _graphDiffMode;
@@ -90,25 +95,31 @@ public void setSearchDiffMode(boolean searchDiffMode) {
   }
 
   public UpdateIndicesService(
-      GraphService graphService,
-      EntitySearchService entitySearchService,
-      TimeseriesAspectService timeseriesAspectService,
-      SystemMetadataService systemMetadataService,
-      EntityRegistry entityRegistry,
-      SearchDocumentTransformer searchDocumentTransformer) {
+          GraphService graphService,
+          EntitySearchService entitySearchService,
+          TimeseriesAspectService timeseriesAspectService,
+          SystemMetadataService systemMetadataService,
+          EntityRegistry entityRegistry,
+          SearchDocumentTransformer searchDocumentTransformer,
+          EntityIndexBuilders entityIndexBuilders) {
     _graphService = graphService;
     _entitySearchService = entitySearchService;
     _timeseriesAspectService = timeseriesAspectService;
     _systemMetadataService = systemMetadataService;
     _entityRegistry = entityRegistry;
     _searchDocumentTransformer = searchDocumentTransformer;
+    _entityIndexBuilders = entityIndexBuilders;
   }
 
   public void handleChangeEvent(@Nonnull final MetadataChangeLog event) {
-    if (UPDATE_CHANGE_TYPES.contains(event.getChangeType())) {
-      handleUpdateChangeEvent(event);
-    } else if (event.getChangeType() == ChangeType.DELETE) {
-      handleDeleteChangeEvent(event);
+    try {
+      if (UPDATE_CHANGE_TYPES.contains(event.getChangeType())) {
+        handleUpdateChangeEvent(event);
+      } else if (event.getChangeType() == ChangeType.DELETE) {
+        handleDeleteChangeEvent(event);
+      }
+    } catch (IOException e) {
+      throw new RuntimeException(e);
     }
   }
 
@@ -123,7 +134,7 @@ public void handleChangeEvent(@Nonnull final MetadataChangeLog event) {
    *
    * @param event the change event to be processed.
    */
-  public void handleUpdateChangeEvent(@Nonnull final MetadataChangeLog event) {
+  public void handleUpdateChangeEvent(@Nonnull final MetadataChangeLog event) throws IOException {
 
     final EntitySpec entitySpec = getEventEntitySpec(event);
     final Urn urn = EntityKeyUtils.getUrnFromLog(event, entitySpec.getKeyAspectSpec());
@@ -212,7 +223,7 @@ public void handleDeleteChangeEvent(@Nonnull final MetadataChangeLog event) {
     if (!aspectSpec.isTimeseries()) {
       deleteSystemMetadata(urn, aspectSpec, isDeletingKey);
       deleteGraphData(urn, aspectSpec, aspect, isDeletingKey, event);
-      deleteSearchData(urn, entitySpec.getName(), aspectSpec, aspect, isDeletingKey);
+      deleteSearchData(_entitySearchService, urn, entitySpec.getName(), aspectSpec, aspect, isDeletingKey);
     }
   }
 
@@ -405,7 +416,8 @@ private static List<Edge> getMergedEdges(final Set<Edge> oldEdgeSet, final Set<E
   /**
    * Process snapshot and update search index
    */
-  private void updateSearchService(String entityName, Urn urn, AspectSpec aspectSpec, RecordTemplate aspect,
+  private void updateSearchService(String entityName, Urn urn,
+                                   AspectSpec aspectSpec, RecordTemplate aspect,
       @Nullable SystemMetadata systemMetadata, @Nullable RecordTemplate previousAspect) {
     Optional<String> searchDocument;
     Optional<String> previousSearchDocument = Optional.empty();
@@ -513,7 +525,8 @@ private void deleteGraphData(
     }
   }
 
-  private void deleteSearchData(Urn urn, String entityName, AspectSpec aspectSpec, RecordTemplate aspect, Boolean isKeyAspect) {
+  private void deleteSearchData(EntitySearchService entitySearchService, Urn urn, String entityName,
+                                AspectSpec aspectSpec, RecordTemplate aspect, Boolean isKeyAspect) {
     String docId;
     try {
       docId = URLEncoder.encode(urn.toString(), "UTF-8");
@@ -551,4 +564,13 @@ private EntitySpec getEventEntitySpec(@Nonnull final MetadataChangeLog event) {
               event.getEntityType()));
     }
   }
+
+  /**
+   * Allow internal use of the system entity client. Solves recursive dependencies between the UpdateIndicesService
+   * and the SystemJavaEntityClient
+   * @param systemEntityClient system entity client
+   */
+  public void setSystemEntityClient(SystemEntityClient systemEntityClient) {
+    _searchDocumentTransformer.setEntityClient(systemEntityClient);
+  }
 }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/shared/ElasticSearchIndexed.java b/metadata-io/src/main/java/com/linkedin/metadata/shared/ElasticSearchIndexed.java
index 1f13cb8321284..64ad88c08a741 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/shared/ElasticSearchIndexed.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/shared/ElasticSearchIndexed.java
@@ -11,7 +11,7 @@ public interface ElasticSearchIndexed {
      * The index configurations for the given service.
      * @return List of reindex configurations
      */
-    List<ReindexConfig> getReindexConfigs() throws IOException;
+    List<ReindexConfig> buildReindexConfigs() throws IOException;
 
     /**
      * Mirrors the service's functions which
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java
index dd8e19861ccd2..e9ee1d6ee78d5 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java
@@ -205,7 +205,7 @@ public List<IngestionRunSummary> listRuns(Integer pageOffset, Integer pageSize,
   public void configure() {
     log.info("Setting up system metadata index");
     try {
-      for (ReindexConfig config : getReindexConfigs()) {
+      for (ReindexConfig config : buildReindexConfigs()) {
         _indexBuilder.buildIndex(config);
       }
     } catch (IOException ie) {
@@ -214,7 +214,7 @@ public void configure() {
   }
 
   @Override
-  public List<ReindexConfig> getReindexConfigs() throws IOException {
+  public List<ReindexConfig> buildReindexConfigs() throws IOException {
     return List.of(_indexBuilder.buildReindexState(_indexConvention.getIndexName(INDEX_NAME),
             SystemMetadataMappingsBuilder.getMappings(), Collections.emptyMap()));
   }
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java
index 43ba87f474d6a..a496fc427138e 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java
@@ -137,9 +137,10 @@ public void configure() {
   }
 
   @Override
-  public List<ReindexConfig> getReindexConfigs() {
-    return _indexBuilders.getReindexConfigs();
+  public List<ReindexConfig> buildReindexConfigs() {
+    return _indexBuilders.buildReindexConfigs();
   }
+
   public String reindexAsync(String index, @Nullable QueryBuilder filterQuery, BatchWriteOperationsOptions options)
       throws Exception {
     return _indexBuilders.reindexAsync(index, filterQuery, options);
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java
index b0751a9c6f9ea..e9518ed8c39fa 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java
@@ -29,7 +29,7 @@ public class TimeseriesAspectIndexBuilders implements ElasticSearchIndexed {
 
   @Override
   public void reindexAll() {
-    for (ReindexConfig config : getReindexConfigs()) {
+    for (ReindexConfig config : buildReindexConfigs()) {
       try {
         _indexBuilder.buildIndex(config);
       } catch (IOException e) {
@@ -63,7 +63,7 @@ public String reindexAsync(String index, @Nullable QueryBuilder filterQuery, Bat
   }
 
   @Override
-  public List<ReindexConfig> getReindexConfigs() {
+  public List<ReindexConfig> buildReindexConfigs() {
     return _entityRegistry.getEntitySpecs().values().stream()
             .flatMap(entitySpec -> entitySpec.getAspectSpecs().stream()
                     .map(aspectSpec -> Pair.of(entitySpec, aspectSpec)))
@@ -80,4 +80,5 @@ public List<ReindexConfig> getReindexConfigs() {
               }
             }).collect(Collectors.toList());
   }
+
 }
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java
index 38b2ed4ed199a..30d821662d377 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java
@@ -1,18 +1,27 @@
 package com.linkedin.metadata.entity;
 
+import com.linkedin.common.urn.Urn;
+import com.linkedin.metadata.AspectIngestionUtils;
 import com.linkedin.metadata.config.PreProcessHooks;
 import com.linkedin.metadata.EbeanTestUtils;
 import com.linkedin.metadata.entity.ebean.EbeanAspectDao;
 import com.linkedin.metadata.entity.ebean.EbeanRetentionService;
 import com.linkedin.metadata.event.EventProducer;
+import com.linkedin.metadata.key.CorpUserKey;
 import com.linkedin.metadata.models.registry.EntityRegistryException;
 import com.linkedin.metadata.service.UpdateIndicesService;
 import io.ebean.Database;
-import org.testng.Assert;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
+import static com.linkedin.metadata.Constants.*;
 import static org.mockito.Mockito.*;
+import static org.testng.Assert.*;
 
 
 public class EbeanAspectMigrationsDaoTest extends AspectMigrationsDaoTest<EbeanAspectDao> {
@@ -37,13 +46,19 @@ public void setupTest() {
     _migrationsDao = dao;
   }
 
-  /**
-   * Ideally, all tests would be in the base class, so they're reused between all implementations.
-   * When that's the case - test runner will ignore this class (and its base!) so we keep this dummy test
-   * to make sure this class will always be discovered.
-   */
   @Test
-  public void obligatoryTest() throws AssertionError {
-    Assert.assertTrue(true);
+  public void testStreamAspects() throws AssertionError {
+    final int totalAspects = 30;
+    Map<Urn, CorpUserKey> ingestedAspects =
+        AspectIngestionUtils.ingestCorpUserKeyAspects(_entityServiceImpl, totalAspects);
+    List<String> ingestedUrns = ingestedAspects.keySet().stream().map(Urn::toString).collect(Collectors.toList());
+
+    Stream<EntityAspect> aspectStream = _migrationsDao.streamAspects(CORP_USER_ENTITY_NAME, CORP_USER_KEY_ASPECT_NAME);
+    List<EntityAspect> aspectList = aspectStream.collect(Collectors.toList());
+    assertEquals(ingestedUrns.size(), aspectList.size());
+    Set<String> urnsFetched = aspectList.stream().map(EntityAspect::getUrn).collect(Collectors.toSet());
+    for (String urn : ingestedUrns) {
+      assertTrue(urnsFetched.contains(urn));
+    }
   }
 }
diff --git a/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java b/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java
index cfa9c1258583d..12a02f954e1bc 100644
--- a/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java
+++ b/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java
@@ -12,11 +12,16 @@
 import com.linkedin.events.metadata.ChangeType;
 import com.linkedin.glossary.GlossaryTermInfo;
 import com.linkedin.metadata.Constants;
+import com.linkedin.metadata.config.PreProcessHooks;
+import com.linkedin.metadata.entity.AspectDao;
 import com.linkedin.metadata.entity.AspectUtils;
 import com.linkedin.metadata.entity.EntityService;
+import com.linkedin.metadata.entity.EntityServiceImpl;
+import com.linkedin.metadata.event.EventProducer;
 import com.linkedin.metadata.models.AspectSpec;
 import com.linkedin.metadata.models.EntitySpec;
 import com.linkedin.metadata.models.registry.EntityRegistry;
+import com.linkedin.metadata.service.UpdateIndicesService;
 import com.linkedin.metadata.utils.EntityKeyUtils;
 import com.linkedin.metadata.utils.GenericRecordUtils;
 import net.datafaker.Faker;
@@ -42,6 +47,8 @@
 import java.util.stream.LongStream;
 import java.util.stream.Stream;
 
+import static org.mockito.Mockito.mock;
+
 public class DataGenerator {
     private final static Faker FAKER = new Faker();
     private final EntityRegistry entityRegistry;
@@ -52,10 +59,21 @@ public DataGenerator(EntityService entityService) {
         this.entityRegistry = entityService.getEntityRegistry();
     }
 
+    public static DataGenerator build(EntityRegistry entityRegistry) {
+        EntityServiceImpl mockEntityServiceImpl = new EntityServiceImpl(mock(AspectDao.class),
+                mock(EventProducer.class), entityRegistry, false,
+                mock(UpdateIndicesService.class), mock(PreProcessHooks.class));
+        return new DataGenerator(mockEntityServiceImpl);
+    }
+
     public Stream<List<MetadataChangeProposal>> generateDatasets() {
         return generateMCPs("dataset", 10, List.of());
     }
 
+    public List<MetadataChangeProposal> generateTags(long count) {
+        return generateMCPs("tag", count, List.of()).findFirst().get();
+    }
+
     public Stream<List<MetadataChangeProposal>> generateMCPs(String entityName, long count, List<String> aspects) {
         EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName);
 
@@ -127,9 +145,7 @@ public Stream<List<MetadataChangeProposal>> generateMCPs(String entityName, long
     public Map<String, BiFunction<RecordTemplate, Integer, List<MetadataChangeProposal>>> nestedRandomAspectGenerators = Map.of(
             "globalTags", (aspect, count) -> {
                 try {
-                    List<MetadataChangeProposal> tags = generateMCPs("tag", count, List.of())
-                            .map(mcps -> mcps.get(0))
-                            .collect(Collectors.toList());
+                    List<MetadataChangeProposal> tags = generateTags(count);
                     Method setTagsMethod = aspect.getClass().getMethod("setTags", TagAssociationArray.class);
                     TagAssociationArray tagAssociations = new TagAssociationArray();
                     tagAssociations.addAll(tags.stream().map(
diff --git a/metadata-jobs/mae-consumer-job/src/main/resources/application.properties b/metadata-jobs/mae-consumer-job/src/main/resources/application.properties
index 6befa3e8789d8..7df61c93ab66d 100644
--- a/metadata-jobs/mae-consumer-job/src/main/resources/application.properties
+++ b/metadata-jobs/mae-consumer-job/src/main/resources/application.properties
@@ -3,4 +3,4 @@ management.endpoints.web.exposure.include=metrics, health, info
 spring.mvc.servlet.path=/
 management.health.elasticsearch.enabled=false
 management.health.neo4j.enabled=false
-
+entityClient.preferredImpl=restli
diff --git a/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java b/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java
index a214117f4e1bc..aa097a52c8fc6 100644
--- a/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java
+++ b/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java
@@ -7,6 +7,7 @@
 import com.linkedin.metadata.graph.GraphService;
 import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
 import com.linkedin.metadata.models.registry.EntityRegistry;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
 import com.linkedin.metadata.systemmetadata.ElasticSearchSystemMetadataService;
 import io.ebean.Database;
 import org.springframework.boot.test.context.TestConfiguration;
@@ -40,4 +41,7 @@ public class MaeConsumerApplicationTestConfiguration {
 
   @MockBean
   private ConfigEntityRegistry _configEntityRegistry;
+
+  @MockBean
+  public EntityIndexBuilders entityIndexBuilders;
 }
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java
index 64f89c595163d..796f570a1732e 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeLogProcessor.java
@@ -14,6 +14,8 @@
 import com.linkedin.metadata.utils.metrics.MetricUtils;
 import com.linkedin.mxe.MetadataChangeLog;
 import com.linkedin.mxe.Topics;
+
+import java.util.Comparator;
 import java.util.List;
 import java.util.stream.Collectors;
 import lombok.Getter;
@@ -47,7 +49,10 @@ public class MetadataChangeLogProcessor {
 
   @Autowired
   public MetadataChangeLogProcessor(List<MetadataChangeLogHook> metadataChangeLogHooks) {
-    this.hooks = metadataChangeLogHooks.stream().filter(MetadataChangeLogHook::isEnabled).collect(Collectors.toList());
+    this.hooks = metadataChangeLogHooks.stream()
+            .filter(MetadataChangeLogHook::isEnabled)
+            .sorted(Comparator.comparing(MetadataChangeLogHook::executionOrder))
+            .collect(Collectors.toList());
     this.hooks.forEach(MetadataChangeLogHook::init);
   }
 
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/MetadataChangeLogHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/MetadataChangeLogHook.java
index c7857eb7baffc..39b47768a6dcf 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/MetadataChangeLogHook.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/MetadataChangeLogHook.java
@@ -29,4 +29,12 @@ default boolean isEnabled() {
    * Invoke the hook when a MetadataChangeLog is received
    */
   void invoke(@Nonnull MetadataChangeLog log) throws Exception;
+
+  /**
+   * Controls hook execution ordering
+   * @return order to execute
+   */
+  default int executionOrder() {
+    return 100;
+  }
 }
diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java
index fad7a34074964..78c87ec8f4b3b 100644
--- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java
+++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java
@@ -24,7 +24,7 @@
     EntityRegistryFactory.class, SystemMetadataServiceFactory.class, SearchDocumentTransformerFactory.class})
 public class UpdateIndicesHook implements MetadataChangeLogHook {
 
-  private final UpdateIndicesService _updateIndicesService;
+  protected final UpdateIndicesService _updateIndicesService;
   private final boolean _isEnabled;
 
   public UpdateIndicesHook(
diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java
index 030ca83131433..90f8f208c4cb6 100644
--- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java
+++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java
@@ -34,6 +34,7 @@
 import com.linkedin.metadata.query.filter.Filter;
 import com.linkedin.metadata.query.filter.RelationshipDirection;
 import com.linkedin.metadata.search.EntitySearchService;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
 import com.linkedin.metadata.search.transformer.SearchDocumentTransformer;
 import com.linkedin.metadata.service.UpdateIndicesService;
 import com.linkedin.metadata.systemmetadata.SystemMetadataService;
@@ -42,10 +43,12 @@
 import com.linkedin.mxe.MetadataChangeLog;
 import com.linkedin.mxe.SystemMetadata;
 import com.linkedin.schema.SchemaField;
+
 import java.net.URISyntaxException;
 import java.net.URLEncoder;
 import java.nio.charset.StandardCharsets;
 import org.mockito.Mockito;
+import org.springframework.beans.factory.annotation.Value;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
@@ -82,9 +85,13 @@ public class UpdateIndicesHookTest {
   private SearchDocumentTransformer _searchDocumentTransformer;
   private DataHubUpgradeKafkaListener _mockDataHubUpgradeKafkaListener;
   private ConfigurationProvider _mockConfigurationProvider;
+  private EntityIndexBuilders _mockEntityIndexBuilders;
   private Urn _actorUrn;
   private UpdateIndicesService _updateIndicesService;
 
+  @Value("${elasticsearch.index.maxArrayLength}")
+  private int maxArrayLength;
+
   @BeforeMethod
   public void setupTest() {
     _actorUrn = UrnUtils.getUrn(TEST_ACTOR_URN);
@@ -95,6 +102,8 @@ public void setupTest() {
     _searchDocumentTransformer = new SearchDocumentTransformer(1000, 1000, 1000);
     _mockDataHubUpgradeKafkaListener = Mockito.mock(DataHubUpgradeKafkaListener.class);
     _mockConfigurationProvider = Mockito.mock(ConfigurationProvider.class);
+    _mockEntityIndexBuilders = Mockito.mock(EntityIndexBuilders.class);
+
     ElasticSearchConfiguration elasticSearchConfiguration = new ElasticSearchConfiguration();
     SystemUpdateConfiguration systemUpdateConfiguration = new SystemUpdateConfiguration();
     systemUpdateConfiguration.setWaitForSystemUpdate(false);
@@ -105,7 +114,8 @@ public void setupTest() {
         _mockTimeseriesAspectService,
         _mockSystemMetadataService,
         ENTITY_REGISTRY,
-        _searchDocumentTransformer
+        _searchDocumentTransformer,
+        _mockEntityIndexBuilders
     );
     _updateIndicesHook = new UpdateIndicesHook(
         _updateIndicesService,
@@ -163,7 +173,8 @@ public void testInputFieldsEdgesAreAdded() throws Exception {
         _mockTimeseriesAspectService,
         _mockSystemMetadataService,
         mockEntityRegistry,
-        _searchDocumentTransformer
+        _searchDocumentTransformer,
+        _mockEntityIndexBuilders
     );
     _updateIndicesHook = new UpdateIndicesHook(_updateIndicesService, true);
 
diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java
index dc5a6cd23295b..1d9c17c676990 100644
--- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java
+++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java
@@ -9,6 +9,7 @@
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.registry.SchemaRegistryService;
 import com.linkedin.metadata.search.elasticsearch.ElasticSearchService;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
 import com.linkedin.metadata.search.transformer.SearchDocumentTransformer;
 import com.linkedin.metadata.systemmetadata.SystemMetadataService;
 import com.linkedin.metadata.timeseries.TimeseriesAspectService;
@@ -64,4 +65,7 @@ public class MCLSpringTestConfiguration {
 
   @MockBean
   public SchemaRegistryService schemaRegistryService;
+
+  @MockBean
+  public EntityIndexBuilders entityIndexBuilders;
 }
diff --git a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java
index 558a7b9d90ccb..bee1441b5aaf6 100644
--- a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java
+++ b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java
@@ -8,6 +8,7 @@
 import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.restli.DefaultRestliClientFactory;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
 import com.linkedin.metadata.timeseries.TimeseriesAspectService;
 import com.linkedin.parseq.retry.backoff.ExponentialBackoff;
 import com.linkedin.restli.client.Client;
@@ -57,4 +58,7 @@ public RestliEntityClient restliEntityClient() {
 
     @MockBean
     protected SiblingGraphService siblingGraphService;
+
+    @MockBean
+    public EntityIndexBuilders entityIndexBuilders;
 }
diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml
index 42749d8205d21..f180a3f42b730 100644
--- a/metadata-service/configuration/src/main/resources/application.yml
+++ b/metadata-service/configuration/src/main/resources/application.yml
@@ -339,7 +339,7 @@ cache:
       statsEnabled: ${CACHE_CLIENT_ENTITY_CLIENT_STATS_ENABLED:true}
       statsIntervalSeconds: ${CACHE_CLIENT_ENTITY_CLIENT_STATS_INTERVAL_SECONDS:120}
       defaultTTLSeconds: ${CACHE_CLIENT_ENTITY_CLIENT_TTL_SECONDS:0} # do not cache entity/aspects by default
-      maxBytes: ${CACHE_CLIENT_USAGE_ENTITY_MAX_BYTES:104857600} # 100MB
+      maxBytes: ${CACHE_CLIENT_ENTITY_CLIENT_MAX_BYTES:104857600} # 100MB
       entityAspectTTLSeconds:
         # cache user aspects for 20s
         corpuser:
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java
index 5122be69982f0..f1c1a7b743714 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java
@@ -33,17 +33,19 @@ public class EntityServiceFactory {
           TopicConventionFactory.TOPIC_CONVENTION_BEAN, "entityRegistry"})
   @Nonnull
   protected EntityService createInstance(
-      Producer<String, ? extends IndexedRecord> producer,
-      TopicConvention convention,
-      KafkaHealthChecker kafkaHealthChecker,
-      @Qualifier("entityAspectDao") AspectDao aspectDao,
-      EntityRegistry entityRegistry,
-      ConfigurationProvider configurationProvider,
-      UpdateIndicesService updateIndicesService) {
+          Producer<String, ? extends IndexedRecord> producer,
+          TopicConvention convention,
+          KafkaHealthChecker kafkaHealthChecker,
+          @Qualifier("entityAspectDao") AspectDao aspectDao,
+          EntityRegistry entityRegistry,
+          ConfigurationProvider configurationProvider,
+          UpdateIndicesService updateIndicesService) {
 
     final KafkaEventProducer eventProducer = new KafkaEventProducer(producer, convention, kafkaHealthChecker);
     FeatureFlags featureFlags = configurationProvider.getFeatureFlags();
-    return new EntityServiceImpl(aspectDao, eventProducer, entityRegistry,
+    EntityService entityService = new EntityServiceImpl(aspectDao, eventProducer, entityRegistry,
         featureFlags.isAlwaysEmitChangeLog(), updateIndicesService, featureFlags.getPreProcessHooks(), _ebeanMaxTransactionRetry);
+
+    return entityService;
   }
 }
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java
index e1c24b805437b..3f2388f4829e3 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java
@@ -16,14 +16,17 @@
 import com.linkedin.metadata.timeseries.TimeseriesAspectService;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnExpression;
 import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.Configuration;
 import org.springframework.context.annotation.Import;
 
 
 @Configuration
+@ConditionalOnExpression("'${entityClient.preferredImpl:java}'.equals('java')")
 @Import({DataHubKafkaProducerFactory.class})
 public class JavaEntityClientFactory {
+
   @Autowired
   @Qualifier("entityService")
   private EntityService _entityService;
@@ -74,7 +77,7 @@ public JavaEntityClient getJavaEntityClient(@Qualifier("restliEntityClient") fin
   public SystemJavaEntityClient systemJavaEntityClient(@Qualifier("configurationProvider") final ConfigurationProvider configurationProvider,
                                                        @Qualifier("systemAuthentication") final Authentication systemAuthentication,
                                                        @Qualifier("systemRestliEntityClient") final RestliEntityClient restliEntityClient) {
-    return new SystemJavaEntityClient(
+    SystemJavaEntityClient systemJavaEntityClient = new SystemJavaEntityClient(
             _entityService,
             _deleteEntityService,
             _entitySearchService,
@@ -86,5 +89,9 @@ public SystemJavaEntityClient systemJavaEntityClient(@Qualifier("configurationPr
             restliEntityClient,
             systemAuthentication,
             configurationProvider.getCache().getClient().getEntityClient());
+
+    _entityService.setSystemEntityClient(systemJavaEntityClient);
+
+    return systemJavaEntityClient;
   }
 }
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/update/indices/UpdateIndicesServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/update/indices/UpdateIndicesServiceFactory.java
index f86f6bf7d0877..a4ea02af94bad 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/update/indices/UpdateIndicesServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/update/indices/UpdateIndicesServiceFactory.java
@@ -1,24 +1,44 @@
 package com.linkedin.gms.factory.entity.update.indices;
 
+import com.linkedin.entity.client.SystemRestliEntityClient;
+import com.linkedin.gms.factory.search.EntityIndexBuildersFactory;
 import com.linkedin.metadata.graph.GraphService;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.search.EntitySearchService;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
 import com.linkedin.metadata.search.transformer.SearchDocumentTransformer;
 import com.linkedin.metadata.service.UpdateIndicesService;
 import com.linkedin.metadata.systemmetadata.SystemMetadataService;
 import com.linkedin.metadata.timeseries.TimeseriesAspectService;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.context.ApplicationContext;
 import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.Import;
 
 
 @Configuration
+@Import(EntityIndexBuildersFactory.class)
 public class UpdateIndicesServiceFactory {
+  @Autowired
+  private ApplicationContext context;
+  @Value("${entityClient.preferredImpl:java}")
+  private String entityClientImpl;
 
   @Bean
   public UpdateIndicesService updateIndicesService(GraphService graphService, EntitySearchService entitySearchService,
-      TimeseriesAspectService timeseriesAspectService, SystemMetadataService systemMetadataService,
-      EntityRegistry entityRegistry, SearchDocumentTransformer searchDocumentTransformer) {
-    return new UpdateIndicesService(graphService, entitySearchService, timeseriesAspectService,
-        systemMetadataService, entityRegistry, searchDocumentTransformer);
+                                                   TimeseriesAspectService timeseriesAspectService,
+                                                   SystemMetadataService systemMetadataService,
+                                                   EntityRegistry entityRegistry, SearchDocumentTransformer searchDocumentTransformer,
+                                                   EntityIndexBuilders entityIndexBuilders) {
+    UpdateIndicesService updateIndicesService = new UpdateIndicesService(graphService, entitySearchService, timeseriesAspectService,
+            systemMetadataService, entityRegistry, searchDocumentTransformer, entityIndexBuilders);
+
+    if ("restli".equals(entityClientImpl)) {
+      updateIndicesService.setSystemEntityClient(context.getBean(SystemRestliEntityClient.class));
+    }
+
+    return updateIndicesService;
   }
 }
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java
index a2a0dbaf89c79..6d8a62ac1fd18 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java
@@ -47,6 +47,9 @@ public class ElasticSearchServiceFactory {
   @Qualifier("settingsBuilder")
   private SettingsBuilder settingsBuilder;
 
+  @Autowired
+  private EntityIndexBuilders entityIndexBuilders;
+
   @Autowired
   private ConfigurationProvider configurationProvider;
 
@@ -64,9 +67,7 @@ protected ElasticSearchService getInstance(ConfigurationProvider configurationPr
         new ESSearchDAO(entityRegistry, components.getSearchClient(), components.getIndexConvention(),
                 configurationProvider.getFeatureFlags().isPointInTimeCreationEnabled(),
                 elasticSearchConfiguration.getImplementation(), searchConfiguration, customSearchConfiguration);
-    return new ElasticSearchService(
-        new EntityIndexBuilders(components.getIndexBuilder(), entityRegistry, components.getIndexConvention(),
-            settingsBuilder), esSearchDAO,
+    return new ElasticSearchService(entityIndexBuilders, esSearchDAO,
         new ESBrowseDAO(entityRegistry, components.getSearchClient(), components.getIndexConvention(),
             searchConfiguration, customSearchConfiguration),
         new ESWriteDAO(entityRegistry, components.getSearchClient(), components.getIndexConvention(),
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/EntityIndexBuildersFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/EntityIndexBuildersFactory.java
new file mode 100644
index 0000000000000..6bb206ee3ad61
--- /dev/null
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/EntityIndexBuildersFactory.java
@@ -0,0 +1,35 @@
+package com.linkedin.gms.factory.search;
+
+import com.linkedin.metadata.models.registry.EntityRegistry;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
+import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder;
+import com.linkedin.metadata.spring.YamlPropertySourceFactory;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.PropertySource;
+
+
+@Configuration
+@PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class)
+public class EntityIndexBuildersFactory {
+
+    @Autowired
+    @Qualifier("baseElasticSearchComponents")
+    private BaseElasticSearchComponentsFactory.BaseElasticSearchComponents components;
+
+    @Autowired
+    @Qualifier("entityRegistry")
+    private EntityRegistry entityRegistry;
+
+    @Autowired
+    @Qualifier("settingsBuilder")
+    private SettingsBuilder settingsBuilder;
+
+
+    @Bean
+    protected EntityIndexBuilders entityIndexBuilders() {
+        return new EntityIndexBuilders(components.getIndexBuilder(), entityRegistry, components.getIndexConvention(), settingsBuilder);
+    }
+}
\ No newline at end of file
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClientCache.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClientCache.java
index 3b35dc528915a..6006f3a9a87f6 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClientCache.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClientCache.java
@@ -21,7 +21,6 @@
 import java.util.function.BiFunction;
 import java.util.function.Function;
 import java.util.stream.Collectors;
-import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
 
 import static com.linkedin.metadata.utils.PegasusUtils.urnToEntityName;
@@ -44,8 +43,7 @@ public Map<Urn, EntityResponse> batchGetV2(@Nonnull final Set<Urn> urns, @Nonnul
 
         if (config.isEnabled()) {
             Set<Key> keys = urns.stream()
-                    .flatMap(urn -> aspectNames.stream()
-                            .map(a -> Key.builder().urn(urn).aspectName(a).build()))
+                    .flatMap(urn -> aspectNames.stream().map(a -> Key.builder().urn(urn).aspectName(a).build()))
                     .collect(Collectors.toSet());
             Map<Key, EnvelopedAspect> envelopedAspects = cache.getAll(keys);
 
@@ -92,13 +90,13 @@ public EntityClientCache build(Class<?> metricClazz) {
                 Map<String, Set<Key>> keysByEntity = StreamSupport.stream(keys.spliterator(), true)
                         .collect(Collectors.groupingBy(Key::getEntityName, Collectors.toSet()));
 
-                Stream<Map.Entry<Key, EnvelopedAspect>> results = keysByEntity.entrySet().parallelStream()
+                Map<Key, EnvelopedAspect> results = keysByEntity.entrySet().parallelStream()
                         .flatMap(entry -> {
                             Set<Urn> urns = entry.getValue().stream()
                                     .map(Key::getUrn)
                                     .collect(Collectors.toSet());
                             Set<String> aspects = entry.getValue().stream()
-                                    .map(Key::getEntityName)
+                                    .map(Key::getAspectName)
                                     .collect(Collectors.toSet());
                             return loadFunction.apply(urns, aspects).entrySet().stream();
                         })
@@ -106,9 +104,9 @@ public EntityClientCache build(Class<?> metricClazz) {
                                 .map(envAspect -> {
                                     Key key = Key.builder().urn(resp.getKey()).aspectName(envAspect.getName()).build();
                                     return Map.entry(key, envAspect);
-                                }));
+                                })).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
 
-                return results.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+                return results;
             };
 
             // ideally the cache time comes from caching headers from service, but configuration driven for now
diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java
index 30cfc2e0288bd..b7607053df8e3 100644
--- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java
+++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java
@@ -9,6 +9,7 @@
 import com.linkedin.entity.Entity;
 import com.linkedin.entity.EntityResponse;
 import com.linkedin.entity.EnvelopedAspect;
+import com.linkedin.entity.client.SystemEntityClient;
 import com.linkedin.events.metadata.ChangeType;
 import com.linkedin.metadata.aspect.VersionedAspect;
 import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs;
@@ -297,4 +298,11 @@ Pair<Boolean, List<Pair<String, RecordTemplate>>> generateDefaultAspectsOnFirstW
    */
   @Nonnull
   BrowsePathsV2 buildDefaultBrowsePathV2(final @Nonnull Urn urn, boolean useContainerPaths) throws URISyntaxException;
+
+  /**
+   * Allow internal use of the system entity client. Solves recursive dependencies between the EntityService
+   * and the SystemJavaEntityClient
+   * @param systemEntityClient system entity client
+   */
+  void setSystemEntityClient(SystemEntityClient systemEntityClient);
 }

From b81e818e47d6d16330693265fd87a590446c7131 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Mon, 2 Oct 2023 12:08:37 -0500
Subject: [PATCH 068/156] feat(openapi): openapi v2 updates (#8927)

---
 build.gradle                                  |   4 +-
 .../io/datahubproject/OpenApiEntities.java    |  29 ++-
 .../src/main/resources/application.yml        |   2 +
 .../health/config/SpringWebConfig.java        |  35 ----
 .../delegates/EntityApiDelegateImpl.java      | 197 +++++++++++++++++-
 .../openapi/util/OpenApiEntitiesUtil.java     |   8 +-
 .../OpenAPIEntityTestConfiguration.java       |  19 +-
 .../delegates/EntityApiDelegateImplTest.java  |  54 ++++-
 .../0.0.0-dev/entity-registry.yaml            |   8 +
 .../0.0.0-dev/metadata-models-custom.jar      | Bin 0 -> 20878 bytes
 .../openapi/config/SpringWebConfig.java       |  25 +++
 .../{ => health}/HealthController.java        |   2 +-
 .../openapi/util/MappingUtil.java             | 119 ++++++++---
 .../webapp/WEB-INF/healthServlet-servlet.xml  |  14 --
 .../webapp/WEB-INF/openapiServlet-servlet.xml |   4 +-
 .../war/src/main/webapp/WEB-INF/web.xml       |   8 +-
 16 files changed, 428 insertions(+), 100 deletions(-)
 delete mode 100644 metadata-service/health-servlet/src/main/java/com/datahub/health/config/SpringWebConfig.java
 create mode 100644 metadata-service/openapi-entity-servlet/src/test/resources/custom-model/mycompany-dq-model/0.0.0-dev/entity-registry.yaml
 create mode 100644 metadata-service/openapi-entity-servlet/src/test/resources/custom-model/mycompany-dq-model/0.0.0-dev/metadata-models-custom.jar
 rename metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/{ => health}/HealthController.java (94%)
 delete mode 100644 metadata-service/war/src/main/webapp/WEB-INF/healthServlet-servlet.xml

diff --git a/build.gradle b/build.gradle
index c8892045a6683..025c588da2b52 100644
--- a/build.gradle
+++ b/build.gradle
@@ -200,8 +200,8 @@ project.ext.externalDependency = [
     'springBootStarterValidation': "org.springframework.boot:spring-boot-starter-validation:$springBootVersion",
     'springKafka': 'org.springframework.kafka:spring-kafka:2.8.11',
     'springActuator': "org.springframework.boot:spring-boot-starter-actuator:$springBootVersion",
-    'swaggerAnnotations': 'io.swagger.core.v3:swagger-annotations:2.1.12',
-    'swaggerCli': 'io.swagger.codegen.v3:swagger-codegen-cli:3.0.41',
+    'swaggerAnnotations': 'io.swagger.core.v3:swagger-annotations:2.2.15',
+    'swaggerCli': 'io.swagger.codegen.v3:swagger-codegen-cli:3.0.46',
     'testngJava8': 'org.testng:testng:7.5.1',
     'testng': 'org.testng:testng:7.8.0',
     'testContainers': 'org.testcontainers:testcontainers:' + testContainersVersion,
diff --git a/buildSrc/src/main/java/io/datahubproject/OpenApiEntities.java b/buildSrc/src/main/java/io/datahubproject/OpenApiEntities.java
index 7fbf013384b7d..888c4a0e99931 100644
--- a/buildSrc/src/main/java/io/datahubproject/OpenApiEntities.java
+++ b/buildSrc/src/main/java/io/datahubproject/OpenApiEntities.java
@@ -6,6 +6,7 @@
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
 import com.fasterxml.jackson.dataformat.yaml.YAMLMapper;
+import com.google.common.collect.ImmutableSet;
 import com.linkedin.metadata.models.registry.config.Entities;
 import com.linkedin.metadata.models.registry.config.Entity;
 import org.gradle.internal.Pair;
@@ -16,7 +17,12 @@
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.nio.file.StandardOpenOption;
-import java.util.*;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.Spliterator;
+import java.util.Spliterators;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
@@ -37,10 +43,23 @@ public class OpenApiEntities {
     private String entityRegistryYaml;
     private Path combinedDirectory;
 
-    private final static Set<String> SUPPORTED_ASPECT_PATHS = Set.of(
-            "domains", "ownership", "deprecation", "status", "globalTags", "glossaryTerms", "dataContractInfo",
-            "browsePathsV2"
-    );
+    private final static ImmutableSet<Object> SUPPORTED_ASPECT_PATHS = ImmutableSet.builder()
+                .add("domains")
+                .add("ownership")
+                .add("deprecation")
+                .add("status")
+                .add("globalTags")
+                .add("glossaryTerms")
+                .add("dataContractInfo")
+                .add("browsePathsV2")
+                .add("datasetProperties").add("editableDatasetProperties")
+                .add("chartInfo").add("editableChartProperties")
+                .add("dashboardInfo").add("editableDashboardProperties")
+                .add("notebookInfo").add("editableNotebookProperties")
+                .add("dataProductProperties")
+                .add("institutionalMemory")
+                .build();
+
 
     public OpenApiEntities(JsonNodeFactory NODE_FACTORY) {
         this.NODE_FACTORY = NODE_FACTORY;
diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml
index f180a3f42b730..4be31b2b6bb15 100644
--- a/metadata-service/configuration/src/main/resources/application.yml
+++ b/metadata-service/configuration/src/main/resources/application.yml
@@ -351,3 +351,5 @@ cache:
           status: 20
           corpUserCredentials: 20
           corpUserSettings: 20
+
+springdoc.api-docs.groups.enabled: true
\ No newline at end of file
diff --git a/metadata-service/health-servlet/src/main/java/com/datahub/health/config/SpringWebConfig.java b/metadata-service/health-servlet/src/main/java/com/datahub/health/config/SpringWebConfig.java
deleted file mode 100644
index 76d9a6744c4cf..0000000000000
--- a/metadata-service/health-servlet/src/main/java/com/datahub/health/config/SpringWebConfig.java
+++ /dev/null
@@ -1,35 +0,0 @@
-package com.datahub.health.config;
-
-import io.swagger.v3.oas.annotations.OpenAPIDefinition;
-import io.swagger.v3.oas.annotations.info.Info;
-import io.swagger.v3.oas.annotations.servers.Server;
-import java.util.List;
-import org.springframework.context.annotation.Configuration;
-import org.springframework.format.FormatterRegistry;
-import org.springframework.http.converter.ByteArrayHttpMessageConverter;
-import org.springframework.http.converter.FormHttpMessageConverter;
-import org.springframework.http.converter.HttpMessageConverter;
-import org.springframework.http.converter.StringHttpMessageConverter;
-import org.springframework.http.converter.json.MappingJackson2HttpMessageConverter;
-import org.springframework.web.servlet.config.annotation.EnableWebMvc;
-import org.springframework.web.servlet.config.annotation.WebMvcConfigurer;
-
-
-@EnableWebMvc
-@OpenAPIDefinition(info = @Info(title = "DataHub OpenAPI", version = "1.0.0"),
-        servers = {@Server(url = "/health/", description = "Default Server URL")})
-@Configuration
-public class SpringWebConfig implements WebMvcConfigurer {
-
-  @Override
-  public void configureMessageConverters(List<HttpMessageConverter<?>> messageConverters) {
-    messageConverters.add(new StringHttpMessageConverter());
-    messageConverters.add(new ByteArrayHttpMessageConverter());
-    messageConverters.add(new FormHttpMessageConverter());
-    messageConverters.add(new MappingJackson2HttpMessageConverter());
-  }
-
-  @Override
-  public void addFormatters(FormatterRegistry registry) {
-  }
-}
diff --git a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java
index 5d1065e80d419..ade49c876f168 100644
--- a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java
+++ b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java
@@ -14,22 +14,34 @@
 import io.datahubproject.openapi.dto.UrnResponseMap;
 import io.datahubproject.openapi.entities.EntitiesController;
 import com.datahub.authorization.AuthorizerChain;
+import io.datahubproject.openapi.exception.UnauthorizedException;
 import io.datahubproject.openapi.generated.BrowsePathsV2AspectRequestV2;
 import io.datahubproject.openapi.generated.BrowsePathsV2AspectResponseV2;
+import io.datahubproject.openapi.generated.ChartInfoAspectRequestV2;
+import io.datahubproject.openapi.generated.ChartInfoAspectResponseV2;
+import io.datahubproject.openapi.generated.DataProductPropertiesAspectRequestV2;
+import io.datahubproject.openapi.generated.DataProductPropertiesAspectResponseV2;
+import io.datahubproject.openapi.generated.DatasetPropertiesAspectRequestV2;
+import io.datahubproject.openapi.generated.DatasetPropertiesAspectResponseV2;
 import io.datahubproject.openapi.generated.DeprecationAspectRequestV2;
 import io.datahubproject.openapi.generated.DeprecationAspectResponseV2;
 import io.datahubproject.openapi.generated.DomainsAspectRequestV2;
 import io.datahubproject.openapi.generated.DomainsAspectResponseV2;
+import io.datahubproject.openapi.generated.EditableChartPropertiesAspectRequestV2;
+import io.datahubproject.openapi.generated.EditableChartPropertiesAspectResponseV2;
+import io.datahubproject.openapi.generated.EditableDatasetPropertiesAspectRequestV2;
+import io.datahubproject.openapi.generated.EditableDatasetPropertiesAspectResponseV2;
 import io.datahubproject.openapi.generated.GlobalTagsAspectRequestV2;
 import io.datahubproject.openapi.generated.GlobalTagsAspectResponseV2;
 import io.datahubproject.openapi.generated.GlossaryTermsAspectRequestV2;
 import io.datahubproject.openapi.generated.GlossaryTermsAspectResponseV2;
+import io.datahubproject.openapi.generated.InstitutionalMemoryAspectRequestV2;
+import io.datahubproject.openapi.generated.InstitutionalMemoryAspectResponseV2;
 import io.datahubproject.openapi.generated.OwnershipAspectRequestV2;
 import io.datahubproject.openapi.generated.OwnershipAspectResponseV2;
 import io.datahubproject.openapi.generated.SortOrder;
 import io.datahubproject.openapi.generated.StatusAspectRequestV2;
 import io.datahubproject.openapi.generated.StatusAspectResponseV2;
-import io.datahubproject.openapi.exception.UnauthorizedException;
 import io.datahubproject.openapi.util.OpenApiEntitiesUtil;
 import com.datahub.authorization.ConjunctivePrivilegeGroup;
 import com.datahub.authorization.DisjunctivePrivilegeGroup;
@@ -408,4 +420,187 @@ private void checkScrollAuthorized(Authentication authentication, EntitySpec ent
             throw new UnauthorizedException(actorUrnStr + " is unauthorized to get entities.");
         }
     }
+
+    public ResponseEntity<DatasetPropertiesAspectResponseV2> createDatasetProperties(@Valid DatasetPropertiesAspectRequestV2 body, String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return createAspect(urn, methodNameToAspectName(methodName), body, DatasetPropertiesAspectRequestV2.class,
+                DatasetPropertiesAspectResponseV2.class);
+    }
+
+    public ResponseEntity<EditableDatasetPropertiesAspectResponseV2> createEditableDatasetProperties(
+            @Valid EditableDatasetPropertiesAspectRequestV2 body, String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return createAspect(urn, methodNameToAspectName(methodName), body, EditableDatasetPropertiesAspectRequestV2.class,
+                EditableDatasetPropertiesAspectResponseV2.class);
+    }
+
+    public ResponseEntity<InstitutionalMemoryAspectResponseV2> createInstitutionalMemory(
+            @Valid InstitutionalMemoryAspectRequestV2 body, String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return createAspect(urn, methodNameToAspectName(methodName), body, InstitutionalMemoryAspectRequestV2.class,
+                InstitutionalMemoryAspectResponseV2.class);
+    }
+
+    public ResponseEntity<ChartInfoAspectResponseV2> createChartInfo(@Valid ChartInfoAspectRequestV2 body, String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return createAspect(urn, methodNameToAspectName(methodName), body, ChartInfoAspectRequestV2.class,
+                ChartInfoAspectResponseV2.class);
+    }
+
+    public ResponseEntity<EditableChartPropertiesAspectResponseV2> createEditableChartProperties(
+            @Valid EditableChartPropertiesAspectRequestV2 body, String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return createAspect(urn, methodNameToAspectName(methodName), body, EditableChartPropertiesAspectRequestV2.class,
+                EditableChartPropertiesAspectResponseV2.class);
+    }
+
+    public ResponseEntity<DataProductPropertiesAspectResponseV2> createDataProductProperties(
+            @Valid DataProductPropertiesAspectRequestV2 body, String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return createAspect(urn, methodNameToAspectName(methodName), body, DataProductPropertiesAspectRequestV2.class,
+                DataProductPropertiesAspectResponseV2.class);
+    }
+
+    public ResponseEntity<Void> deleteDatasetProperties(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return deleteAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<Void> deleteEditableDatasetProperties(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return deleteAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<Void> deleteInstitutionalMemory(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return deleteAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<Void> deleteChartInfo(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return deleteAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<DatasetPropertiesAspectResponseV2> getDatasetProperties(String urn, Boolean systemMetadata) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return getAspect(urn, systemMetadata, methodNameToAspectName(methodName), _respClazz,
+                DatasetPropertiesAspectResponseV2.class);
+    }
+
+    public ResponseEntity<EditableDatasetPropertiesAspectResponseV2> getEditableDatasetProperties(String urn, Boolean systemMetadata) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return getAspect(urn, systemMetadata, methodNameToAspectName(methodName), _respClazz,
+                EditableDatasetPropertiesAspectResponseV2.class);
+    }
+
+    public ResponseEntity<InstitutionalMemoryAspectResponseV2> getInstitutionalMemory(String urn, Boolean systemMetadata) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return getAspect(urn, systemMetadata, methodNameToAspectName(methodName), _respClazz,
+                InstitutionalMemoryAspectResponseV2.class);
+    }
+
+    public ResponseEntity<EditableChartPropertiesAspectResponseV2> getEditableChartProperties(String urn, Boolean systemMetadata) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return getAspect(urn, systemMetadata, methodNameToAspectName(methodName), _respClazz, EditableChartPropertiesAspectResponseV2.class);
+    }
+
+    public ResponseEntity<ChartInfoAspectResponseV2> getChartInfo(String urn, Boolean systemMetadata) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return getAspect(urn, systemMetadata, methodNameToAspectName(methodName), _respClazz,
+                ChartInfoAspectResponseV2.class);
+    }
+
+    public ResponseEntity<DataProductPropertiesAspectResponseV2> getDataProductProperties(String urn, Boolean systemMetadata) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return getAspect(urn, systemMetadata, methodNameToAspectName(methodName), _respClazz,
+                DataProductPropertiesAspectResponseV2.class);
+    }
+
+    public ResponseEntity<Void> headDatasetProperties(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return headAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<Void> headEditableDatasetProperties(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return headAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<Void> headInstitutionalMemory(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return headAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<Void> headDataProductProperties(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return headAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<Void> headEditableChartProperties(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return headAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<Void> headChartInfo(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return headAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<Void> deleteEditableChartProperties(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return deleteAspect(urn, methodNameToAspectName(methodName));
+    }
+
+    public ResponseEntity<Void> deleteDataProductProperties(String urn) {
+        String methodName = walker.walk(frames -> frames
+                .findFirst()
+                .map(StackWalker.StackFrame::getMethodName)).get();
+        return deleteAspect(urn, methodNameToAspectName(methodName));
+    }
 }
diff --git a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/util/OpenApiEntitiesUtil.java b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/util/OpenApiEntitiesUtil.java
index 13c2d83343aa0..205d401dd956d 100644
--- a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/util/OpenApiEntitiesUtil.java
+++ b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/util/OpenApiEntitiesUtil.java
@@ -54,7 +54,7 @@ public static <T> UpsertAspectRequest convertAspectToUpsert(String entityUrn, Ob
             if (aspectRequest != null) {
                 // i.e. GlobalTags
                 Method valueMethod = REFLECT.lookupMethod(aspectRequestClazz, "getValue");
-                Object aspect = valueMethod.invoke(aspectRequest);
+                Object aspect = valueMethod == null ? null : valueMethod.invoke(aspectRequest);
 
                 if (aspect != null) {
                     builder.aspect((OneOfGenericAspectValue) aspect);
@@ -82,13 +82,13 @@ public static <T> List<UpsertAspectRequest> convertEntityToUpsert(Object openapi
                         Method aspectMethod = REFLECT.lookupMethod(fromClazz, "get" + upperAspectName);
 
                         // i.e. GlobalTagsAspectRequestV2
-                        Object aspectRequest = aspectMethod.invoke(openapiEntity);
+                        Object aspectRequest = aspectMethod == null ? null : aspectMethod.invoke(openapiEntity);
                         if (aspectRequest != null) {
                             Class<?> aspectRequestClazz = REFLECT.lookupClass(upperAspectName + ASPECT_REQUEST_SUFFIX);
 
                             // i.e. GlobalTags
                             Method valueMethod = REFLECT.lookupMethod(aspectRequestClazz, "getValue");
-                            Object aspect = valueMethod.invoke(aspectRequest);
+                            Object aspect = valueMethod == null ? null : valueMethod.invoke(aspectRequest);
 
                             if (aspect != null) {
                                 builder.aspect((OneOfGenericAspectValue) aspect);
@@ -109,7 +109,7 @@ public static <E, A> Optional<A> convertAspect(UrnResponseMap urnResponseMap, St
         return convertEntity(urnResponseMap, entityClazz, withSystemMetadata).map(entity -> {
             try {
                 Method aspectMethod = REFLECT.lookupMethod(entityClazz, "get" + toUpperFirst(aspectName));
-                return aspectClazz.cast(aspectMethod.invoke(entity));
+                return aspectMethod == null ? null : aspectClazz.cast(aspectMethod.invoke(entity));
             } catch (IllegalAccessException | InvocationTargetException e) {
                 throw new RuntimeException(e);
             }
diff --git a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java
index b7e255b8c270e..cabaa2cbd75e6 100644
--- a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java
+++ b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java
@@ -13,6 +13,9 @@
 import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
 import com.linkedin.metadata.models.registry.EntityRegistry;
 import com.linkedin.metadata.models.registry.EntityRegistryException;
+import com.linkedin.metadata.models.registry.MergedEntityRegistry;
+import com.linkedin.metadata.models.registry.PluginEntityRegistryLoader;
+import com.linkedin.metadata.models.registry.SnapshotEntityRegistry;
 import com.linkedin.metadata.search.ScrollResult;
 import com.linkedin.metadata.search.SearchEntityArray;
 import com.linkedin.metadata.search.SearchService;
@@ -87,9 +90,21 @@ public AuthorizerChain authorizerChain() {
 
     @Bean("entityRegistry")
     @Primary
-    public ConfigEntityRegistry configEntityRegistry() throws EntityRegistryException {
-        return new ConfigEntityRegistry(
+    public EntityRegistry entityRegistry() throws EntityRegistryException, InterruptedException {
+        /*
+           Considered a few different approach to loading a custom model. Chose this method
+           to as closely match a production configuration rather than direct project to project
+           dependency.
+         */
+        PluginEntityRegistryLoader custom = new PluginEntityRegistryLoader(
+                getClass().getResource("/custom-model").getFile());
+
+        ConfigEntityRegistry standard = new ConfigEntityRegistry(
                 OpenAPIEntityTestConfiguration.class.getClassLoader().getResourceAsStream("entity-registry.yml"));
+        MergedEntityRegistry entityRegistry = new MergedEntityRegistry(SnapshotEntityRegistry.getInstance()).apply(standard);
+        custom.withBaseRegistry(entityRegistry).start(true);
+
+        return entityRegistry;
     }
 
     /* Controllers not under this module */
diff --git a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/delegates/EntityApiDelegateImplTest.java b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/delegates/EntityApiDelegateImplTest.java
index fc2aae1a75ab8..57803ac904a93 100644
--- a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/delegates/EntityApiDelegateImplTest.java
+++ b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/delegates/EntityApiDelegateImplTest.java
@@ -1,6 +1,7 @@
 package io.datahubproject.openapi.delegates;
 
 import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor;
+import com.linkedin.metadata.models.registry.EntityRegistry;
 import io.datahubproject.openapi.config.OpenAPIEntityTestConfiguration;
 import io.datahubproject.openapi.config.SpringWebConfig;
 import io.datahubproject.openapi.generated.BrowsePathEntry;
@@ -31,24 +32,30 @@
 import io.datahubproject.openapi.generated.controller.ChartApiController;
 import io.datahubproject.openapi.generated.controller.DatasetApiController;
 import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc;
 import org.springframework.boot.test.context.SpringBootTest;
 import org.springframework.context.annotation.ComponentScan;
 import org.springframework.context.annotation.Import;
 import org.springframework.http.HttpStatus;
+import org.springframework.http.MediaType;
 import org.springframework.http.ResponseEntity;
 import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
+import org.springframework.test.web.servlet.MockMvc;
+import org.springframework.test.web.servlet.request.MockMvcRequestBuilders;
+import org.springframework.test.web.servlet.result.MockMvcResultMatchers;
 import org.testng.annotations.BeforeTest;
 import org.testng.annotations.Test;
 
 import java.util.List;
 
-import static org.testng.Assert.assertEquals;
-import static org.testng.Assert.assertNotNull;
+import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
+import static org.testng.Assert.*;
 
 
 @SpringBootTest(classes = {SpringWebConfig.class})
 @ComponentScan(basePackages = {"io.datahubproject.openapi.generated.controller"})
 @Import({OpenAPIEntityTestConfiguration.class})
+@AutoConfigureMockMvc
 public class EntityApiDelegateImplTest extends AbstractTestNGSpringContextTests {
     @BeforeTest
     public void disableAssert() {
@@ -60,11 +67,18 @@ public void disableAssert() {
     private ChartApiController chartApiController;
     @Autowired
     private DatasetApiController datasetApiController;
+    @Autowired
+    private EntityRegistry entityRegistry;
+    @Autowired
+    private MockMvc mockMvc;
 
     @Test
     public void initTest() {
         assertNotNull(chartApiController);
         assertNotNull(datasetApiController);
+
+        assertTrue(entityRegistry.getEntitySpec("dataset").getAspectSpecMap().containsKey("customDataQualityRules"),
+                "Failed to load custom model from custom registry");
     }
 
     @Test
@@ -200,4 +214,40 @@ public void glossaryTermsTest() {
         assertEquals(datasetApiController.getGlossaryTerms(testUrn, false).getStatusCode(), HttpStatus.NOT_FOUND);
         assertEquals(datasetApiController.headGlossaryTerms(testUrn).getStatusCode(), HttpStatus.NOT_FOUND);
     }
+
+
+    /**
+     * The purpose of this test is to ensure no errors when a custom aspect is encountered,
+     * not that the custom aspect is processed. The missing piece to support custom
+     * aspects is the openapi generated classes for the custom aspects and related request/responses.
+     */
+    @Test
+    public void customModelTest() throws Exception {
+        String expectedUrn = "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)";
+
+        //CHECKSTYLE:OFF
+        String body = "[\n" +
+                "    {\n" +
+                "        \"urn\": \"" + expectedUrn + "\",\n" +
+                "        \"customDataQualityRules\": [\n" +
+                "            {\n" +
+                "                \"field\": \"my_event_data\",\n" +
+                "                \"isFieldLevel\": false,\n" +
+                "                \"type\": \"isNull\",\n" +
+                "                \"checkDefinition\": \"n/a\",\n" +
+                "                \"url\": \"https://github.com/datahub-project/datahub/blob/master/checks/nonNull.sql\"\n" +
+                "            }\n" +
+                "        ]\n" +
+                "    }\n" +
+                "]";
+        //CHECKSTYLE:ON
+
+        mockMvc.perform(MockMvcRequestBuilders
+                        .post("/v2/entity/dataset")
+                        .content(body)
+                        .contentType(MediaType.APPLICATION_JSON)
+                        .accept(MediaType.APPLICATION_JSON))
+                .andExpect(status().is2xxSuccessful())
+                .andExpect(MockMvcResultMatchers.jsonPath("$.[0].urn").value(expectedUrn));
+    }
 }
diff --git a/metadata-service/openapi-entity-servlet/src/test/resources/custom-model/mycompany-dq-model/0.0.0-dev/entity-registry.yaml b/metadata-service/openapi-entity-servlet/src/test/resources/custom-model/mycompany-dq-model/0.0.0-dev/entity-registry.yaml
new file mode 100644
index 0000000000000..2b501946ca858
--- /dev/null
+++ b/metadata-service/openapi-entity-servlet/src/test/resources/custom-model/mycompany-dq-model/0.0.0-dev/entity-registry.yaml
@@ -0,0 +1,8 @@
+id: mycompany-dq-model
+entities:
+  - name: dataset
+    aspects:
+      - customDataQualityRules
+  - name: container
+    aspects:
+      - customDataQualityRules
\ No newline at end of file
diff --git a/metadata-service/openapi-entity-servlet/src/test/resources/custom-model/mycompany-dq-model/0.0.0-dev/metadata-models-custom.jar b/metadata-service/openapi-entity-servlet/src/test/resources/custom-model/mycompany-dq-model/0.0.0-dev/metadata-models-custom.jar
new file mode 100644
index 0000000000000000000000000000000000000000..7a5cfb325987d51432b4d9ae6b922ecf49b80ef3
GIT binary patch
literal 20878
zcma%j1yE(%k|j{MyF=mb?(XjH?(VJ^cXxMphbr9N-CYWY0*c}HpPBCW`%lli5$DE@
z6Pfo&tlTSe?Y&b$`Wq-T5D+9J(0DzsI?&e#<R4#OAE>WGR#ZihR#Hxk{u_|OUkn@l
zL9TJW46wcq)c<NID<~%^CaSDLCo6U@J2@dEO-nZiCrwK=Jvq~)#IVS`d$fN9@-Kh(
z<^O-L3i|ci*xvTPUGTr!!2fR>ThA{`2SYp0|Ir%hzge3&{x=7ec7@;sa3G-X$Us2U
z|IR_g(8W;E)zHS$#Z$@E##GSB$<UL|*v8P=xkmj$9Yq}VGX@l$t;*n6U>tU(#<zk&
zt&4AqQr}c;5LuvAo4IVzH8mR@-HVj0wcq^uzMs56EY_*bAZgDu_+4;*6nS}>H<DnY
zLoOq%W#sfcyZb+HXLyr;zQ3Ii19jb21u3GznyW4zWT*F_4SVV!)^*3?8$-LGDVhvM
z78v1TpvjcahDf2721y%bqc)}nQKBJbtH!Gu5|<UCr-c?0<JWq4P~EjFMOC@B11t$2
zTZ1m3w+%H|4_DJ*x0tUkf!xWln*Z1=J0zmJ>kg<o1Q(RfuvAA&C%N27+z7A>YfH72
zA*W{KurQgiCG6qLxBm`d975TO9Vqj*Af(;t!o=1FO(v#k=PjKWp&=glF^#wl%xvuy
zhFNJ;GXPsqoWqEDu57l<k(!6ioSh_;PMFx}Y6s^$f_n4EXf%r<VgX*#)G9NVGdD-h
zgpNGq@xaqAWBVbZ+f+hqkQns>L+w0NHc4AD_kb$`F~tu6*RV+H7fODYYe|~eYKkT_
zPs)-$T~GJY^zsX|?QD~#Z(1PcoAmq@mq@v*<-Jj=w`W)YJh*Zdq~vVeFi0b*h;r$L
zWJJ7%(Q0=(?m)uoJi2%qIudx*i0sHCW-A@7+$FB?s9hCMmf#d>8@{l2wJRr%>JnbC
zx7Z%$<-Kd#rY5aCK0$xQGA*}sgmn#SkW2FYi7#!-bEe(lp{3$4x{kUH&h=te5(4r}
zJDgZM*yA3Z-2TRq9X#|IJsH?oC^B8?EYc{}R#fydYkZ)A@#c}oG2q;!F3q3bCB7Ux
z^GUal(G}UEj>&BDzv)3cxEXLn&yd1SoXnmIqa}{4wv1~}9JZ*{qv&hZhHc>_Aw&wE
z0_<K%tvuE?sd>(=)~A*E-eeS6*g$RL;D1IN9`u(f3&YmzH|X;qLvOD*pnSJ@LHurM
z@zV`(Trvtd702omB#-7(y65a8*x;Xul3##f3AjQ#v*L(s24K_&7gQt%Ua!T@U2wDt
zJ3>9>Je38}v-*VAfqmGs6f+PIyn#mWf$dnm!0#}>4~I6)U1;)|wyQkDA;rcLb;V8|
zxsA-)k}v3p-TiJq_04JJiQlv;of7l3W7l0nQ65GcILB?Wo!DJ-i;MIxqZeQm&AVhA
zHf?HXx(B+8Rqb^<80G}yw|GI~w|v3sV;H2O)q~|u>*B89QcK-jJKj+mms3)yG;U3s
zK5=S2z{)ThEA(oHOwim2T~9fzWfympRY)qjJIS=ixdRC3b^^?qwo+Rpx=OW&GDP2W
zkKhPdR}4G*N2gUqVMUB9+FK#T;7Dud!`r1C)n^Ngk#XRtsz~Ab-iE1JJR)5%`?<i}
zXE{0dR@Jedz*Vkax53bU(83HQ%H>9j`(nZBhBE=Ixx`96LMoCxJk?9%jpjNH-eC-H
znsO+wBfL>9VIPY}??aW<YK5W)IIr7>FB?WDA9-lw)`++b@HdH8!Z>ljW){%6rP$xl
zRw*KBXJ<*MuxnW6w{oSJc{fgLP*hpIHMfQtl$qBXfvfgO_i*s4Vh74X8Z)nlstBUf
z7Y2(3CjD~sLP~6;CY>(D3IPcyEMBJxTt*A=C1Bi$NQ0CTxGuB@aoP;CNsPbcSC~@x
zVH>q0J{2xdmv~xb0orQF+R>StO{-}^rDf4S@xUIomr{shWQs8LB$fJBeD%@GExF^?
zPV0vuRqO`tdNe&~VUuG_rNe?&TJDB4E1Sr<`z^CPm857s<4L>#hE#nqq4fy@a^|-R
z12Q2v!HZXruJjLWip72nXW&u`n#f=dl8=aeRa~+wE|&w=olCP<FvtW9caw2Yw-}u*
z(L~9Im&B=gsQf2FmURf~i4xJ`a{t#3q!%GQ+pM3JBM+Yxeq}T~5NGr|1)O_|zQZjK
zPh3-{cXS?etMYwCfunGNqdg&WZ9sQ8rlW*&zP^VOesYvWRrUpZf7tsGkO=j0J_|eg
z?TT&*t>j0Dt?>=6jH@$#c5)BVsxj~iSaPzBxOArjIBF0Q#|R~hsS07KSf-IVw`YY#
z<X%MF<|&%3MebJ8HmN~79j7I$Zf~VIw)UO&4@eFJfh#wzi!2$_&zaKwg3jPV*6MZA
zi#T3wRzZLAfMqGq0Avv-8`mC;Ui=4U$*?KWFZe#A#fTrD8*b5)XKh0&uMJ&I7Iba=
z0#5LaCwM82QYw+)R4xG|@6cBatc-ofJrN5|Ji=E(D%og*g}GFMc&+p8aIT-ge`RWv
zYkkVGFHRo=0|KJ^8_ecRB4%l7W8(Zza!XOxmPZjp{ZrazE8T%6C?H6s;1w(?I3yqe
z3kcD;RT4Jx$!tw(tlwI`S}f)t@CTufm)I{t_yZ%$aw`}!EN;xxL|f7-`@@&R`FN6Z
z=XJV``R23R6UG3GTF@e#I%zf@x)9>tPX>HXOlKhIJCbYh8)MBLc(DCcJ#5D`1CI2~
zt-4N~)WG6`m}u?tGg%w7m)JT$RU_tx!Glf8^XF_H8%={Qty$<tw7G0#$U+G&yL-EB
zxi9f)LYG)*rww-hWD?4HBaNYbdbbi%vuv?*NugeK0<CjZ>52aNbH|~4<tB^CmE{s@
zN`}`Nm0?Q9rYWYyi6l8XJr^t&CUf+=bd?%k3EKjoX<4&J?lCZ0vA(@}UB#(Qx2TqN
z%hh=#`ElKf3^1hezBzZZ##WaT^GUD_(rqfKi1=icrhvD~Qtg|-PQxpo??u{19jW6l
z17bE#ByO=CdDv=#c^)KC)}gdixS&_dk)6sYqYv4|PV8y-Nl4G03~wf-%1zD&*MepD
z`4c8m0Fv;5in(BqO!(Zt8Oji90ye?9BGdt<jdjy7T=!vpuB#mp4e&E4zH$znG?m6s
zvJ!4V|GK@pATs`$L^s+-H4)#rVft+z*)w?yh4XAPf3%|*sbHm6&=;S9lv3xkXbZ;c
z!%nM3gvk}wO`;a(!z4Hv$R_~dJEOq13NKu6`{54@%wb4D${QvzO7G}*1!AUX$Gh72
zn|{@@Jy6XhhX}Ql16kYw*He=0mN$`0KJKe?fo(BvfTWH!P&Ee%kT1CdzdY_wlqdWF
z_gRrU&N6*1h2L5VrPvg8P(#Aj51?8op|<2ZgdE?W$m0+tIPDljuipigQyu#udXb0O
zLvC{be*}v@24Vd;@laeBOJx+5{&WryMOxk^pBE+bN_!|qg75y0VT8*>HkigD0uX8q
z96lN(ECZF0E&ay$Yfw}X+*zl81x3wQ%E$FLg5p2YJrV^cdn;387fX9PSwm;*e+Ed8
zs;>N^04nc-E`|1ZSUCj|G<3UmawI_^LB!8^o5q*W{UWSR;^eE=67HqTqP}5u8Pp+&
zgRBUCsqTduGEf~pd>@OGr|F*OY-fA!kJpO}b|B?;`+PigX9flv1JI(Y_7H2XJ8(~E
z+j`jMd;<g671!vCdaP*mTGXX=<T_PMt~`;S0Ec3n7q~FZ2VW#wk8U*-RzO4|iprH$
z&Nbolg`4Ms-zcd+w)0HNqj>{4;8`u*)FQ{(SACk<<SrBqa4DpK4y#MLf)TfWUKyTy
zuNue0Tk>OX8#+$sF`KK&X^V98OxW?e2vXe*#7vLtk(VNriAMsx?JxGt*piF2I%EEb
zCJbfcRtF{60Ss2k@fZLc%aFn8R#hYZAI0Cbn){m#Z+@2{o<u7RG#$#C`-c6JBeGVz
zja|piSmGj%WtQJQVMdY=EIGr8#c4V#S9~FkqN#1kIvEZ7Mbcum$;cQ@_t{FZE{&a7
z9U+)u*Xv6TY9qYUml#JFT#<jn22)Kre?(Gr_3D<Qb{$toQME1c!9^GJjai&>k7F#2
zuzO(gu#QK7rWFQqXYhCLy?Jl-56o6EN0;72jeM}a-Bv;u0$)2L9cRW@S`nEL3~f@g
zZ<X%$`rFMTp$sP)9s_L>GhuPg`!ImIPJ^%*Jl4jS%q_(zAD&6V8^zG%FU|7@`${~?
zFKkKdQ@KETa|oxdh4*VdkS$H`@raf~YmbsNpe(09BCR7F_7dFVcY3V~stvd|`vjTU
z7PXK&+7;R2bfhh#lbWK`%TWP?K0rVk8Z!f5+M>z=r!0lRfNp-KMc?U=SO;7(T67i5
zEf}7vkm<UkwJfGsR&!ByW|!j@TT?XDA_B*w$2^J60zWEu7A)`l74Mo-JWV41DD1vS
zfc9_T{VyC4lPGP!Ab>dX>4xtnbV?j@XJx6Rlv!ND_X9Y&GEF5Vo(L4KGK9l6$F^}%
z6M~=WC@KIH83G=cgTSi*S;7a157lTH3JjT=G;Hkha?<_}^Tl`mK0gQtge*;x`pQY$
zZ`PV7ii`~=Mr$H-B4dM*fti4Z)Oe!R<$5j4UW1WI=x=)(vU?DZduxV`#w{T7qjEi+
zb%bjU8#)6v$`XGlF7X_0D7hUd674(LoS!nt7d=sBz;*YO({z94uyJ;TDLK{cG3=bU
zRI}N(nLmP?yve{<KS^7Mfn7?D;v&I535BJFrML<`YR)b(u+t^kkz9y^kbzojyr?hb
ze`;%bMmMo)_Q*bFq@GHkhv291VCpN{$iA;5qnRp)Sz)(a?j6R)i+9r&FO^o%?p)vD
zH{ma2*YLGfc+0)az03Md;<d=m%XgLWyjg^OuPj5UCGk3)b8S5W<W9MVX4WyZj>?i!
zhrx_eHkB8+)CS-#HyG~50$M(O$l&hr@XLRLmvkmh1Q>Z{y6w+Jo)9Vq{q|a#2`CK_
z9uei@Eh~@2X?cuMwje(wV-RMOhMLtUtmzx3$%z_ZncSO)RQ_QqL}?GKPVq+d+M!q+
zV11}du0Ik}zDK0F^o#DJDqH0W^|xg5ud*#1lE*g1E<y@ct1?3<cJY8W2{Cu(op=wm
zw;&Tf(SH8Cg!ywd|7Q%-<LE}7(8Sn+?zj@vGKFZ&@h}Z3OX#8K0-xxTYMBBPBZ@pD
z3NAa*)@Yvx<cHg$NKX^Lf5}O}7`ax#zAAl9NFX5AzXL~M3qw0|QyEKV7gIY^r+<#T
zIjU3kIHIVZQ(hlEveD8)l+ADz(s*6d!;<GhMj$c57C~8TjvEpV6j_fX+h$E!s}}*P
zfdT}G$jA_Kn^bh74wWiODo~3s5QzJL!IXClf<TX^?Pgc2F*=;7w@miF=i?ssY5LFY
z52PO)sl?&NEJ4ATFi9-<g0N-6qfmZiAs9j)j3E(-WW=Kfq=KcOo|^IMpqw~^%|9qW
z8BqqS1iOb3d%TwPI)+`%BkGu7(OtcTw=5917OMeT+e+pv;@Z{LrF$%?=VW;%kcV&+
z4yBlKB-~vHx`y#EZW+uE=x|Z=i|lG1kBO~DiKKeXu2bEGs&EC~Vrvzctbkl0(WYgO
zxpL0UeN}MNX(By_7Oj{CzgFW*5o!=q{MtKJy`xk)!{4?SjjG40O@<oQCoNN@&KY|q
zjUKWa%PL7OEr%+W-gkr(U~O90Jw6xRgmkyz+`<0Z%op9R3*2K4`G*$2xrQF>Hj(G<
ztuPqFonf7IQG1*%8?V8N-Niu1m`FM~g}`XAtW%E{tCAnpvWmgNEj2E>Z8WShfK;~F
zWY?K&s!GwRbjoNuUwW4hyDDB#hvJeiy;M({02CWZZ%nx8;;1~K!a<Xir^0<fp51aq
zWl^)tKB_WwM}_&}_lqT}!C9t^PBF3|D$(oJ45+Vc>=iwal~{v8K``y*28!{df>Q0p
zyG(RUgHL5OyPU90<#C*uv=UPn;DR5V_xu-<2p!JcXF-yTpaqd?RV?}xSjVTn_q2CK
z<jX5&bYa^Dn5Up}G4n1tD^?MgqZ9)jc4AKnP?%QFwNDP>VAr`!PxVZ6$*&_(9JU)|
zE7hs--mMU$WnfN?8>t2XewGM9!^vtolSCik6Y+ySiAR?6W|B$a;!5rWCG$H!-J@8b
z!Xlo2h#TQGHpkRIu;mhJ`!C?k>|^dqeKRqB5LaFqG%>xqUD&qs5*#1Pz#HA^>|L>Z
z?dyCC6@I6!&$HQQbIOxP)PMMNJFwqhn0qb!g)=eu!42sXVW}_I8{tES@3O?GS)L)o
zb65%0rx0hhZ>W9+1KGlk64!1;ke>8*XfJZfKKE{-72FK4lx=^8NL6avGu|QY^g$uc
zt&Y$Bb@+|s``saaBAWcQE_%ydM+>%CvXv{IzVyw0>I1%=^vwap*|*IkzQLJ!@fX$K
zn0?ZDy9j5>@07mB%T5A|7+ufS9Z?4o3UHw=AT!En@||eve+2o$F)s?}y?CT{^dX0)
zm4=e^Fh-!pPTUxM5gL@aJ@y4LzJVw<ZiPoiUAxkQX9g#SF$iyi5XMXG09*TaX4vog
z^#T`fZNr?QGST(ddn*lvPc(5zy)&#GJoJ4+isB<f7!ziWYT`nLe17{Y%?%xFA8miB
znNGMsK;(a8T>0NcPK~Cg2kH^lr)=Vs4FGE2$x=WgRdv8_6g*Nr$Ur7Ygbhgo^jnK#
zBX>Mu6Z6ER4S|>yz4nclimmmHFi-hWJPm72e(UP`!unh9&HWCauhq^?*4mVrW}>Wg
zF8R)+&%NKxt9P8F-p9;6J&;DQ|FQw=HtjyI>O~zeiOQ~At8-e2YrFO!FS5wygxhtJ
zMMs`(Um7nT!M-3=i$^)Y4gcxD_wtn3#<Lmx;x;{m*Mpz?4!HGM5GgL|B|Ze<fEXEl
zzleyBZ2tfm{eY04?`LhE$wb-eoh4M%uaNLy<kSPG;VVff`Hsz8^xNp~VI+%RCr+7>
zbBD~=vIdA^UmR70h`pQOM?`o=YDd;47+Xz5v*R5&lU_QodEy*Pr%yzE1chMq48(F!
z9i;}NKn~>w6l&OX8*G=*8N8cJuN6^Z&9HSNc}eoUf=kaOpGs{N7~QHIr`%My2n8;8
zj(Q>oQOu}Pu?tvhFo_!uw>7%<DYTg)YdMLvnLDk*blGSJ>&tA7#nr0<DELdPhvPFB
zqcsxc1}_KmGnI_AmqIK_vEuN11xL&twWATtiFfBMNn5p2nS@S`AD+o0QT25Jok=mK
zckC{1gBx$J+DP<JzdS3esdivjpe7-#S%saKp~|cE8)OAkvoch`vN2X*Tey?36>9WM
z1<G;+Hf1P&vklF!HnnLU!CD<Y94%-ICZ;Z?Da6gWQl6pHWnDpPuz+EmWktCNe?(>s
zYDsBS?w8XgE+@OXG5x`eGuieBQS;&%7me{(XyDYpKF0&G^2Q})6jAe>&Ni)}bk=yZ
zc<)jj>AIpi;t-cDab==#%Q<&ZoYX39qX-kO=-W|xvQ>V7jk~}%)rdF?X;_8(HmO%w
zF6D?CGLb;NZ;GUkT3-m8cg)i6U_I>6oZKnf#M;=i#2_PX7)xe-(Ju66Eak_%Y>O=>
zwvvHz)vZ4NXd0O{9-iBnGs19)<L1j&qVGflHaM|V&vRBVCbv<oAtmLiVx9Qx1GU8>
zF%&f+=mAre{jm*IVeBs?>&!;fD}pJJeugy8PPEHja+}M(A?j22ERkKLyuhprU4DY(
z3Pm6#MO3iv=ApV^1^o2zr&)VCW5QXZW_4#URTZ?i1!{{rRVgl`0r?y}hV=T~h!i`6
zl!>h{Dc^6|h|HK-ZelgyVs(oeB-3miX0?NUq8&fW#0N~@G)_NhE;jayqv-4}f1zXP
zi+>Lce`d$p`N3}L15-El!lsvYi|3;*hTIu_v&R~wp^0A{LU#ph77~6l@dE5O8uBUe
zV;&0B<-D^~d#*x+%L+do1YJ(yfg{!IvX`ysOmbP$+MWdf>zs#VQbJ8}F!BPW?MHQB
z{=#|i2?I>4yzg!=*0e}skGWOaw5DXc<Z#z$`i@)|^^yXMl5AeUtYX{ZX08-U4r*g_
zAWCzJ{$wD<C{bz|1L}zY<3tZJr7NgZ#dtG1e7HTluQApR)rWqIgVi^5V?t6Nx_%Y{
zzNLc8o|P#$lrP|wMV=#d91RAJ$2=-k8x3oRQg5ThGS-gP)?k{BhlQ@7Z)cAlA`6bj
z#9c$O$c)-mx+I4pR)-Hqi8ZoWiH&-p&}3}72tedUvw|mOYBes6%m~94LNB{6xGy-K
zQ&4K6oF(46E_kB-iJS3U=uR@F$1yG-rcSf|T}k{jql?C6H(+Q&yIy0G!hfflq4<VI
zmU-NF$L6k_!fW=p-F9~Km9ZAS%IiMB^1RZBg`-%cNnF5L*r;50YlsRyhbLHYcs}YG
ze$z)_@W%8V*gyG}{5>PnX$>VZx^K6RX}`VTu%c<vGxMV~jyYv|6&6BSs{TwIA+tP6
z8~pJ&qK0jGl_JHP>8#n3)2fkx)8{b<XLZu^Lq+%u_P9%K*k*y#QsIz+4?eGsBq`~l
zC%g|vLtE+~L{c<-#ff`)IG~7ZEMQ+AVz}iV7i$vY=}&TE=}%Te?ASuAC$g0BkN4lq
zIGi}{Ra;aPyNODmC7GY&4l{gHlCVtD5)BS=4GhXO*}}pcae&C2MSD>t<)Y+WWj0>*
zd0g26iHxMtnAlS0VAPF{pXYeZlfEFe6b1I(h^7xcTI&HN=J%{okk1A!iE_#rMhmLl
z?JctVoD~}_sB>u!MvOf>j9WGPiW5nysBPKc8{@G;@^sM3q;|O$l4E-eH<^i~<)Fh`
z4q)?G=3U3InOI>r&eRa&@=2+b2i#~vGkSVkG;0Q0ltYF!p#kk3ynILc*pYGd^dHSe
zYgX4$%$h_(%m(a3C+CL<s^ge-knYJMM>3zI1pD8UML&(_?#LpX<czS?jPZcJpRc0I
zxVzePms*4248NuTOnc>HVr%aS>c7g(nuq2rIcOJMEG%)+JbHx;(Iv$fA59hUkOM>u
z?aL@g&6l_$GGtG9R^#|shgRNZy8MG5q>%AMFi1j}X@$U#rzN^;rqMVJF|{(MPap+o
zF1q*;iPbb_DfCp({1KI;imoISBA4E;+|$!2L)Ugf-=V60!e2gw&>f!#WRQP9hGwi?
z@@b2p*on^K9Va*ee;HBSNE^B|I%gI6K!3;^TTz$sB&}%^xTWZdpx?-mZiOn_tKt%2
zKLI<A-g_5r)*VBbE~TD@Mp{Wse*%%8ew@VFZ5D#FCA)L%9zWov9$~8Y?RQ0HFORC-
z0(&<*B2Ef9Z$vV2VoKuoLUk@OPG5OpX^Yu}@+(cmfrSSC5F2LrthbKJ(}h1Vp+Aje
z=>}2c2=6%1SY1JE-vy=P3597aH+D2DKG0ZXj~zPPjslS$u1^jD8>y#>njPz=7DBa-
zk2FN?3)){5VeE|N>C5%$OM3T$S=*U7P?hwasE-O5g2kIUFhC8Sm2f<6R4s>BpYjUM
z*P=9P3FjpO5X@fTaMU?e1?|wiV~7TLijk*I{?tTh!Xt%?71tNM;z)f4J2pY$!-_yp
z|4wV@w>?Zs0{-AjN+fI806E4+xIvf|<_Pz6G}i;fX!jjPNE{e3s66pACbalPYz9^R
zvl~ys(}KkVQa14b;KgV8qu=vJQD>6@AnjD(7lv_8HJwAsULFJ_j1)On5~eGB@diTO
z7@qBdCqQ3FK3mU=O!@|27-~TRGBO@)K_#hx!5afcx-CNX?nHY$``e9*^hSjE+TfG>
zht{TK=BB9XC4S>B-`&rT<R*mpFXdbnA?XC4FpgJhF>cR}CwByVEnR%DXH{BNao0Wg
z@CN#N2s`|-cl~w#9c)vy72}=-?>KabwX=a9aiXvyc@8YI%k^CHZbvzJaz>#Om&k_C
zJC*Cm>Y(Jo2AT(w<JDr$yrfL352CLmfkrK-|J);4b)sCIwn(u}OAO(}neWFpk8Gvz
z#^hOVBstXb`bFiR@<40i?jTS1@i3SLJKFa~5WEeB$ZuqHsjBB^CS7zKlAH316@Cdo
zAIPNl6JU1(p081qlH_A~3*)_2yswOqJQ{pK3z<;BcCIzl6lQ#JhJ;x%l5A;H$x}5P
z4tC9yLuSM4Ccl<H^J2cY9oXufSlkNGi4}6kc9m4b&lLuhM<mD7IfzoBz_$BI&FH{b
zJ+GU2;HD|-_U&UH{zwsbT!LcB6PjRrDHm)s>UhGe<qOg&a(UZQI+qpH5r9$*ba#Zi
z&cB0@db6UsdJv{snOCMRIX`kS?MXPtSwGdm$e5L#soS@=sH%OX|7(g|U+eHY@k<FB
z{wlvI|4xcb!uZc}J0{6menA0q<a29yTr3cZD4d<{s8mQ4jGmzLQV6?CwvEwfHePt8
zUTDQ=MUbKQcHu=!29gngup7q2S9Xo;vnR2B;_p{BbAQ7rFYo{H#}R&)abIzBI0y1K
zBYMFc#|RWs60Qk;sYiOO5qJ}iHJwmV7cTkBDL%rp=wy@v=;TBw@xlT%mK(u^oNB9#
z_YOr?tBx6d5^s^00pm>k3LO1GG8}Lq?c=;?sF~t(r5t5x8yNuyw>mPS)XV)xrX|jr
zm+Oc_l+8$C2JqBehN@BRSO?>p_OJ9_d3tMy=G{fzEr~2#K;px6o2yyIwqvgyR!b<a
z+vq5_=|!qC^VL@Lo@#buI0O+oDbFA=R4`_Id6G18>aEBR3VKBvvl}YSGKCdt%Z2)J
z$`w}~D0$CyX8c@Z$wBBgjTDDa?d~6$A!$6_AhvdinFT6wl=uv98kGJje{{3EDFl>k
zHylvVtCpfO12o=_I&yf0pFyMDACMS+I%meHdmN;g9i*ro41`Tq-d|>{b<oy04?(kX
zSD^1_rfzIb+gW4+y*il|9?Tn?z3QjOjymwAjHM`Tbg+xEro~uqSalk%u6WF6o+_?1
zb@)^Yer~8=pt`M3ds*x52{G1@MdCfpO=BcgxcCSc1tIFo+eG{2V}7^=u0nnu0-X`4
z&o&IJ!Y1euLlQ|4@7`mD+~>{RKxY4<_J~xVct$H&;R8T*ty3TaHdbXRetQ7I?U8x+
z)MqG*OQ8(u4b^epAU`vGg2N$eEHQO4f0C3+nx8h)CD0Mal31eO{K|d!Mzu=b;t(QV
zELaIr2&wQMger9ilvlgJ8`>+I(8z=#d`6TwR<7^zja^c>UDWYsTghhvYQr`EUxDqY
z(&xkR1?-bAVE-*m>EEgOIf?%<-8J&rM7wp>C`1`vSXf?etEvWyi~>uKm`DcJO~8k+
zQGnOERpb5hLHi7bDuU-Uc=$BfkKiDqg;A%ItaEHSn>TIe&rHVa-Tf}&4<2+G6GlrT
z#56daxi2A`sy3rN*76%W>?VuDgn*=YDB-h$7n7FJX!5u=?3!sG9VWnf9_7N(TN^aX
zXM<F};h1}KYd=tBhu56VYCPWXRB`tPnmn<$ZFm4ZSoM(yiKdGO7Hr7eI(DaZLsyG?
z^J4uG;|V(M<siL_>6|Y@m(e%7el*^nvgo)H)h@@dR^<<ZkRVc}VSjl9?;aE8c+1*V
z{DYz?9ee147817=sLE(Q^-WWN(o~rrE{Bn7CEtkLyNy}ejUx=YopyV07wk9gz7#pi
zjM@jSLOR|qIaRk)7SXH?sYUFY)AR;|gWOn6m}f@uro<5vvm%l0dS@jqGI$-ld)O!b
z*r4>1N-pgtLZTEYCQLGKW?s5hjMB51a&+4?Z@8{=(LzFy%xQdg<A#O-2D!oY3RtQv
z2~#DA91bH71umubhMEw%+^1yuF0X|T?Gu+Nh_mx5a`C!Q=KKxGWe}D|pdO{!oq!%P
z<pG8ova8u`!ZN)4_+%1G%%7N7u;z^0!Zd_QgU~6y7DF;mwBSz(-g=&{L67*t>w8FQ
zdk7ag(9-gzz68r7z18=WG{h;75gK`Kjbut1uPV#xM!pvlo$>ul%9=Y7k3|U514AZX
zv&tlCb91SRB7gp6cIl@rzVPEWAfOx=ARxBCHOBrsEKa3vIc<obj(k%4Qz$MLH#BS{
zlF4edakMJ4+MYr{Q4*38?y|@ZG^5z1$JB6Hp{|W8ar=Si22tBhz!rm<1yHB&@p{0G
zuccTWa<x~Ok24rs&*buYPp|FZ*F44b`MqQPz`eGbW#A51)LC`_a~2-m(?wHfo!7zn
z;R*+(v%nC9L$$r2(BFMZ1H1bHOjFrT>D5G)irGxBv%{8k(cLk^;4@Og&qAH6>JAdx
zd(2VVWpi(~V8f(CtA74A>=%^_cikL)o;FN-eJh){k-E|;9h<6?*5;IBcZ`(?lWOc4
z1>5lczzcYB>ak*qPNp5{zPh<dzc%+7A(O4tsdIS4cc-00NO81ZDcnud$Le0g^Z3<p
zbT!ThnDTig^dE%VX|g#8ypiBGCPh6id%LaUG+SV<0<O8u{e#t*Q)SJnSN*F>4+Z0Q
z@b*r%sMP&x>Ik*>m(JOwd`oD!*=Pm-?30%GQ9?fa$+6MPW=prT(mc|0L5|tnq0GFD
zJ5!x;ZCKe$M4pN7&I@=Tgtu%~dd8C<Vj9T%EbH+M7T}xd6Fga0+j6HqjwIEanw)sT
z5Jkn)+?-Dk5*rfdQmd~#Fs@d#d>oJ(A%97JQp_FX8rLvSM~$_aSYxhAHC7pmn5(|b
zeA`51K0R7=#KAE-#^cg+HVh<jZbZkFbCt)4nB_ssf=$!IfEmz;>o>x@GLk=W^OMDW
zI*AJjsaMxY@-{=f`i-PF=l{A);!$Ln)CV(%y|#)Zrh=eL1+Bx}g9x9qXSgf8mf%>u
z=S|dOCoUdc%D3NYZ^x5A1ANQ*5Xx4#-Nv)xWOd)ul<9TkXB1frj;)K2iK^H}LVB7v
z1T~+XoCY0ALZB(Z1NN3m0#<ffp-2sm9KC0Iei4W^?4ad^GXkm!i6D=N(wa_-3bP~l
ziWfYC_y%5i`X108q@VDDFSQ3WmaKwrunQFY3?px$qyvsahGF0+Qx6K8nhe|&xWJGf
zKW&f*BHxmPl-Wd*L)-y*#HnkCidICW8`=NVAwjD$6r@)2n3q=7O`v`&H)@z76S^Q1
z*pd{(KcK*^Oqxi!)wGH@FB6wPCNg{Yjl~3`2n}STE~4}SXFl)_$m?7~^3;sWWq_HU
zFgrjBI1Z&>Q7FA1Ewz+h7`%!m>g}>vrWuiBL97#x2yLDvhDwPl5jDzSJJz5#u>A+F
z|IbE|K8nlfK>v80GsNG~ht7;4e^_n7ZwHk?f6HU8$?*+2HuM^ihZ(VF?I~pm)AaAL
z*v@ju)k8%$FjF*A8abh}YsxvL!v?i-(u^%JQ=*ZEIVV;r$t2l~4$|+@S}A|M>Ej$g
zdx$^5df&gyldP^=n8kf@+Q1j5QT&a8<UeyY+pk>hYv|ZgUZY?D2D^nSfGk%~yp15o
zSe0nmaKR%6hwE^GqjsTn!wbKCd(o1GWDNO7rk1PyjOYtYa>e=V?)~Q^JA3=>;(10H
zC@w*2m&|TxE_jcl#?BcRiE&);*o-ES8}kGh#ZgPv33somYjgA9xHp^<DKdaklfT}l
zDAClraZk;3!^v%`@o3can0bx8%A4(8R+IgsTAK|lgl!QfN-e7})-za$#$Qpu!>Eyj
zpb_`-*&@~W_<V_Wfx3}4%gu{*#lS7X6a8e^EbW@vI4zQuqoB)mhJhJ9><?PyYwNVl
z1V7E9t+Q9y6|+>18|+4n=Cs@L1GSyz;5Q^@Wc>Yv5WWW~mQhV8`ClqO5Ys5woLf_}
zt5#y9hMBIM*6(%X`<d~daGQwq93Hbnq;+`Xz@VZwX&mp7VL-$z(4`*ZFY%q>Q|`o%
z`nC_TfD_L-O51Fjw5F<|BwDRc+T3IYFc^IFGQ~=c7t#1!OE_IiSRYsUnl8GBUj`2W
zZoNjaBwh{qbQoZx7*QCyaeU0&FLf>qgDEa#Ev1S<5_Yl+({|0SrPCMk_DdI9ijT>q
zM=R`~rTk4(tu|j_aV&i{^`2FjIaAUs%eo*jyY(`^eyXDG{3^tif5@O)K(f`L;1vJq
z8sPK&IiESkPlwDulpva+CV+kfQBEG2AMy8e-Y+Dggm-ljJqS$V=vGCNXfTiHSOK)p
z>AXQCt}H5{2!{1xx_%9uE~p`(K0%ReFKhwSV9XM3{2>m!Rn0qTk5HOd-0eNySA=qd
zwC<R1K4jm?(BcKcFQK*~<dcDcbxYh~w=Rf8O|?rO=)nPruwaGO?Q(oUI3{Jm%`P}&
z#A~+@e@)arv!u-7zu58QYwg9~W5<6t`&XkX`?W9$)321a%eF!*YRDc8je@ow42fR=
zwJK(?a*m2jZ?qoow7)TQi?#v6Kb-w0R|Yi(;$SRT5n*^R&xpsu$9z3s6uGitYWn1!
z?}mA1^0N2s_MA09Q74ip(gpQuRHU30Q4}-S5O!=moG8i=j~LlmX{0<>nM;njd1hMe
z7(MiC2mKy)1jfvCO`}$^qdzB8QHzz%Id7-ZY2(hit*Z6cFjh0vM8mrzU|R*|<qsA>
zp;ieQwhKCy*iI$wB*Z&xW{oA0LPf@I#$hG#W7uGnUK@X4S|@`WrqIB%*CAvAG0Uxd
zdTsL_bSlm!?_Go$%rdUsNUy!0ky74#sN*6z#CnCse2+`NB3K&Mx1yg&{{SHCTXfpR
z!uw}qRMs?IOT$5O@sR3I0CRJVV;5B>IZgsBcO)Ns8il#>m@F1fc@5J!$ACi(+J-iR
zxdtuaF9PZ)LqKk-(da;7%dSJ;dE&1nS{G^b4X0F*F{fx`MohDzR?G29j@$%ln50sR
zt1LJ3LbL^PO8;h!iq6te?fH&Fvj|}(GJtOQ#J&wIcRe$}IbM5TN&psxqt2KK0MkB-
zxRcEa_~7^v|FN;KB%+#DWgNNp!2ertr6KeqpE*5o1C!3VPN&a|g+aDWr5ZW@TGA=1
zX~{pYz+2m`F*B?57lo`_$j>+W6T5L&PuuQ3QC#6l)wfx&*Sn%;P+q8xs|PJuLV`Gu
zn9EYVTrc3tqB9K0V&)knAE_Ro0>M*)lpD(ZZ$vNQ@aL|}bF0BU0ajPUxJCSU3E(Hj
z{mqislQBOuEFAC($f3QH@k@ETf2W+s{uJ5ZY8IajGA=N|b5NKr`XgPOM>wh{)JOgZ
z>c4Ow0e@kx^An?_OgiR>{oyd;DBfu7fFU+Mv51Q40WZuYMALhOJ>)>SxV5UlEg3Ut
zJa7^x1Y#NK^(de;eY;YIamlWl2mbDtM(`d}0cmjRbT`ljS7)vA#%#r2`<-m7T)sHt
zL**!6vC5XbUN8@wkY7cjird5wkM!hd!U|D8kiSOQDNy6w0Rj+E!`FI0vcEwa|19up
zG+{hYS6u(hNK~!q#sJY1+&0Dw0G7T%MT7wb0!#9kz#yXULhg@FSjD5Obs4#XQ8X-&
zut+L%M`Ur-=%%$>UhYG*qCtiO7CB-MSS*uDUH1ngw%Ba9SY)kMBC=RwuAZ_p*4Dzr
zs!}d*gx#+<zt%i`X20b*&*1gGE~n*!)@Ry17?9^C99z%xP*1Ityi%j>P;7RC{VIgq
zNlxf{+Sh%-LEE`2#N0_u=zDa!$2<GTiR^neLeIVbm3`BB%d7d=&9alm$sfNBqxX);
zf0qwI?{$m(N6_y@-15&@)c1ot)%Pfz&vQ{uJ-34$lXdy-7kB4%dDAoh@|TRrJnshF
znU34MJ0H<Z^^#Y5`qVvBv^r14xHQu#dX@M2n3Wz2mYvUy7=6!M>-RXEzJ%w`l;bwL
zZo(P%Esu$q&jUI^>_jm~<-jcHl8eMr3G5|VJ2y~ImAmmK4RBUVDV5AUQS#uT4E#*d
z6$XpD3YLwi9+t2k5$6)hlv0^Y&-qDp%Rng@C@pc4!jbj<Ko6Zb6f>8hLh(lfBGi~m
z=)pLo$kwn2hrIWBHpK9Ui<|-vp%XOFmgn$CWmj_su2lw({woCnrukX-?gHL;$V)u<
z5aEb~EWm(Q3S~*?k!#!4W~()I!LkM_ylCv1Z-IpHcY2F(2#AND0t2#M4A>VaNL#}|
z^wexASVoY&(?x+OfdGbYR;O}<LJ5Ak`xkISF+1FkC|;|XoZPl%)-2e|-;i?8uVkZP
z&z6%rda6l0s&)C1BSQuqtTz|9ait?OmL`&zrtGsfPi5jF&^EYRU0a{Ag;cdf95nB6
z?%e@rQVlDp%L!&I+Gg<A#`mjC_a(FoJgt+sJKqn$L~%@Cyo3V#eghTM)2@$Do4Wys
zeqZnX9PqNy-*wQyND~hZ{1qrbLvpihU}!`z_||8lq|UJE)E6AXWUFpaKBPcP91_Y|
z%D72#V)dbHn8S{_@C|CK_@UA#)oeI0{IsbppuQhDf!=rPTVQ+3(&b5TNaD2!fk(7$
zBabHT0z?scVzSHg1yohn^q4J_uLH&86U*f+N|(ogR%H%Ip~LLzM5PZX;_(Y!sE4<=
zabOYz$etlD^Ys%h`sc6cS9UmY<O0DD0SbI0?N%|OIS~O|j4y={tPC6E$zb4m&q2cm
zy>8H!8R7Q`^-O5j1{)8;d52v3FK4g9ji*5^2pZ2b&BU{aS^*k{a`FcafHXQsI|+6~
zy}Cxf%(0}Iz?+Pb3H&sq>68|~X=-ueo=)FFyVMF3?smS6XxKZv^G-vIm0M!&%J{`Q
z#C@hXt~V9;noPXjLdS8`xS;vt{#b5YS*&!P^OWkrurH0lyTPVmefSXe+KO=`6Eh<{
zJGW!i0W_cpY0nlPs!A08P|rHheWN0u&jeAf#AzDz=jkjY%rTVM2&eCww;5Tz@3OTt
z)tVfGetz-P^dyN}?l15lL0<eiE1flw)w8@!qB07XQHAk}lzEm?7M2LW;Y8&_+R~lV
zz@kf;$Ek*tM&e4i$@8$mDwkZ(*4&;(JW8j@GpFYMg=iuG7?kxvaK%ByE3D*9JYM-c
zL_8nBLA&fg4KiOvz#`KConV@REyPhdhg>=T3;*GmnDm~<gi^x71<%en6<fSfY{H^L
z%&&6!m*QrDBAjZKN|hpB&1`utwu)ATA~CCFJB+*a3nVgC&AbI`&E7h}ktQd8XJG+k
znQYcz$TAzTu9}013g&q=n^$oV<I2Y9#arx(IbofI2sKGp<eqiMwk$m|s};Jz1<PjC
z8;h1a4rG+F23Nmf4?p5@5m!N3Ams#^;e`XmCONC&*q_k`i3gKnA-Qen7%ht;mMoS|
ze0M}Gr{T@pJp~zd>dmb=5p|iki-eFGwE?Wn8SzOD7j5H@f#5S{s4iOjjIk0U21G;M
zxz+HaW*3CR13|cF0>M+vHV9?3<W1-lHtXilRw>{yOSOS-GMGH!T~Hg&!f9u(f`7UN
zwz=S)dO;EuUSTIY@hm(fX8CAH_^740MkvR$7+HP6d@g1k<Xm{jxbV^Nw1>}jHI<D&
z585Tl(`lwzb$*Ci=EkGRBc`5cnxIUJw68}XfCc^LP;q5E5UCi|`pq|x45t{}8t5Rp
z8D84yqGiirW46w<)(*EELh~7m@}qYQze~47EoQx3x5&0Rr>@#l{U&?bA`D=>n-eF+
zOfuzYX%x;OmAOlBbegl65u-+z+pijLU$$08n+97^z*YgpQRISv04Hh;)k9CK-2RTY
zadf?uaN)!jnkAQ6y*YAm7|URyi*kA~!PBR5PGI8_gcI!~#3EPj5v=HV<{QoThHSUV
z;tlal*yjXVj5tDo0=ZA|djgY(&m0kZ3lTFKQYxpc1C<MOd;CEr<xG&JKM{$kA48>c
zhe0m6HvmO=AYzPlopd&`@;a$N#6jTFv_)02MYXV%s-ktga;QU#Ytb-)(S2!}mGs6w
zd|<S_&5fgR>*r0Xtj*w5!JvT4YIOh>^`65Y?G>i&-#qRHwF_|90^5G;3C(<IQJ0cU
zQ#DOe{fFDMc_ueXXeNpV77}p}lB&7#L)K_Gw3_$i#o|`I+(@ip9JRy1do^r!W8Sfd
zQb$ZjpsxGXmcyS<7B&n9x;fCD>)g(MM-zHPJ0k#)hW6a|(fPC;_P~h2qZgs1TU)b@
zkd==LpE%_+@ZLT-W@@DO*IE%$w|!k8sd@n`_iNht3m|^94na)$US-S~;$`o?N8I{c
zoe!Lg7X{{A#S2ao-Z<@<^jjNH$&Q&ThQ2LejOTzFR&|9%X6eTYV~4yjbJ%U{&Tt`1
zR7>b$M@eCxmi98haK5gHr0f)iq7xKzuYmyP&nqU6c3t4_R0_l3bp6P7X?GrIR*rbf
z1^C*t0ZBa4%qa(`VlR|GwzI%x_RO7SuegEBpzHw3{Q^F)Wzl;yMjR=yW%yO~=v8nW
zC8bg@u@Bo}8<_DBZ3iFd^`hV=`M-F~VzKi@r4ty}Cz<^rzSo<nVa@|RqfMS;4X~~?
zX?r|D+!owA@vbqmJ9}gISx;?FIhne!ZD_sF=!BlMAKXv%hH##EwExiH2+lKbsyH=Z
zWiR#gZZ+Kwi|IJr`x&?DqhUWrCl{IR?L3~MH<0i9zL`f>x4PXExBqOzXfo-#q$6}z
zPWz+zIF&`e%SONJ7vnqMC!Nvt?vBVLBERXf(t=VOk66;^IG42*y>V{65WsdqS`(Fn
zLIoZM&d~3>2&XcCMRT&H1390afM(_y3|wBlg5NyDDcyUZ>Fo21x#Q{HeY(Co6wLY8
zB7K|&>LBQt5A@ay|0~v(w0ei;rwf@)X$=>2Rx%qgpQ|a>7TDi3<5qdBPYlNGWTq0l
zUFfj0GlGgUur@~UwdQUu%+_4#2syA_QP0<i-j`}mYm(oB1LvR44Sgb;K(Tt__7-e<
z(<{H;#NBT*9I|a&PVe}%$R=t`moc>D=NI(2f_*FafYP!X!-E^s-mS3L7nG+5^M%&7
zesCp2;07MvZ+S?#ja&W}?Z4u1&uWTKb<YHDnry~|Wp2cs7T$89zD|hTz`VZ~68GE;
zeb+YX9U_bR^XFcp|E-&44B!?82PVc7d=K+^`$s({vlTKhWv=+TQJ2*<%`wM>1*<p6
zlvlkzn>lnx-4{xT;Ey^x08QPtDa`@<TkmAWg`~;>wMIl)1Ne+X&<nQPu4d^VyA8HY
zs|TIf<Hp;pedt;kTp@-U8hRRb>6UrJu3QvYOl&x}tli@C3)RT1gDS}EuzfJiiEcu;
z;w{hHF9?}XjF*R3!eBonR}V^BM2z*o)$~?te2R4(3gryhF$We-CqRd(4G=^nR%}^Q
zzvCbwuKTY%x@{d!t3vzBTh<uEl(9>xthI5?>V|z+S0+7XKiMJcbq6huW+U`A{ymQm
z%8`p14n6<a7>`ik?s1%Z?5Ewd)8Pw_aQ)keKh|>ceS7(Dw0Sf3zbNm?_#~3i2g<H!
zxu*(>aLjV#q6`sK`l<%a#cr`S`3+I)8j4o2T9Jlu<3m|cm`|}M$-j?VPKPK~pM>u-
zzbI3SJZW+qXJlgN4QmN^xFr5gQK499sqVrpY37i<;nBX~S$^X@aCl)J;ZckqN0~af
z2YtmdX5~@I;+FUE3*wo(j3>8X?QR}sl;<({9;`ftav>(GO>zz`mee`L!zdq#k2TFL
zKTD=EXQrY|t)_&tJj-TNdbKLZJuAsBL;Vk@sWVR_o_&>E3%O$`9DlRi{BPat?7~I(
z+wZ{|0%4dBwB<$5GW_y9={&7vY2?dgcZ1;E@+=z-%3t?GJI0f6YfU9z3}=1<g&mL=
zTbSE#Q7C9rOkqEXXfFfnO%Z5^JuJutl_R%D(Ss4}f)l%N)gH4w<GC-4avWiB&2wuI
zyWm%pL!NDz<+-1fO@6(v@qv00A@7{~GTAKm#_t|&q)rvd&M9(huG#;AJCrwpH)Kv8
zzAr1M_zo$4TNJDK9&gO(?W@KpjX_3f_T7obOxJ9%{VTXHjh0Pe%{f1)wEv)vj4t33
z>zkJGGv^pR?X{^-+R33eX(tx%m@EvY$o+2KyEIDeohtr>Px`}2_Tk=;n=8)k`yKBA
z1m#owv=GBFT(7_8;ouCd3E?FFSuJaO8c0^Yum8a><?I7{;SJ;RY^6-}MjZD{meQ+@
zYxYex^%pu%H6?k^_nd(38u$k_o?ldY;$C^q#qQeZJ!m^54}2#Q_DpB8MUp0LJ!6>D
zE}i`lfsL4%N)bw?P~#~j3kA={_Ho-+l3w=G^Ic1&P$di8wj5d8<IpcD;EU9<7D{S{
zJ&oX3<615h7qb;M0%`L>!sB9_l{OjpJ>Mq;Z?4#Sa)2{)w?l<Y+07_))9Wrx<=y3@
zw&M<<V8rVlQ_LKvFx_}Ub8=el7)zIbV=8M~`$bpCHjI;RtHbx#;P&erZMCaOeqz_J
z2JMPriFJs-1U2Y~+`qg3RP!z?al@;+VptqsA+Urlp^O(x0FVtB``{`4#*B+aK7#a-
z;vY{2rcn|qS9$SKi#4o4mK$iF9&S&(iNL}%w@vx9Z(?A>yfp+Y!L8*99G4B5FjdbF
z@my0J4_8EX1W4b&Vv{o}i`{b!Z}(!d(;d)P3)nXl3e;5add%^#E`ZTWY_Ou~H;&fq
zN5*G?`Fer~hAN{yq@h4krB~*YJ1!~4C-xC+AwDdEhFeI5ryuAy$Y&vF=recK<|p0Y
zchMP?+CO_<c0|{f)jxyqoa`mcII-&rw7F2nT=Si=;cR`(Z<)9tNVF}TVvD1f__9-+
z6j?LszkKb7(x3KRnt@PZ-AT97_*?-0U3Ukk)q9FwAP&^f(NvET#*xVV`fBHget(ai
ziZZMxmx8Y)^{#jL>i<`n;Lj`rGxnFd%>AV<GyM&j;GdhI{<1qz$<`iO5YaES?Hf;t
zsJ5*yB}Gg3+FQOpb|x8Iv1C%z+<bFpW;(5n!K2+zW5zvwP5xC86#oGJaSrc%RRYT%
zgeGS08J;ic?c?S3kvqWLBT)?5diaorM`obE2o)@prBI{k*J?kwu#q-Zg*)bQ)HU~?
z1a4tk-jS4Vx;P{F>Ud=p-zvCT=8=S}x2<6S(bOp(d=K)X4x{qConxIn=+Chp1hPCG
z(Py=|@#DFudRe&M`jkkEynL9D(Ck}kGw$z5Ykca&WgkB+9j5f*JIEhT4NfDr2zHD5
z+q=bld~3U4VHSFn#2ooAjY_QnNB-J4kN$1y>HBNBHQNQhRg~ROR|W#4T1%f<+RXl3
z7m6AmD8b|b>QJTAV*MFCJJ)c)X&cC8cc|}Mn=M&vi{shMmV^=v4<bR>q$oDZ4aS60
z3V!p?^vAP1$X7lW(xxnsl`~tohR)<*Jm^gIS}-2c6`5#ULfS%~hn!Isuqc{aN;2E$
z@NB!|JZ=Spf3&Kfqk8^+OJdMRovemL(kVI(*{8@+>rmo_;D!Na)BFY$oA85Lhn$`r
zum_GVj18l?NkVn7)sI||Hr7VYuDMBe1OJLt9THS!60@t!1xKdGCw@o+sxz8H0AMfJ
zOsC*WFm8|XMhT+rPuDrdDXg2&M|_KZ;Y7~+qd?mKu+eA1gpvt)9rgr_ema?GZkW?Z
znMrFut^Ze~eSz!$HhB2j@&V*vYHsN4>ipljF#K0L*#Bbp|7!?={x7zFuQ9|wdRP3P
zJNd^I{?qH<G>C}1%I);{a%lKEX#OY1e`yp!=U`%E8_f+5!UzlYmfcUj`mI<`aqb74
z%sv>*2>dI7Xn0pr4Mo1^(BuLIStZst!TRQ6M)smuQod|z)M^ophP{d+2b>c+P<n`Z
zM#%?*js{BTMKz5&We48Na|wB51Po@v#j%ai!@f3$A_sj-<D;Hmb&FB&`)zOk(q)65
z+`78%>&`KN|G5Y3|M}?u@&wNRcm&<p2IzhUM8Gr8P(nwux1ho)O><|&K*pwmOqPcv
zfWX&%K?SvQ^Vg31&IhyHAs!WoX_=RlCs(#|jlK}B{B{DgLqHo)yt9}ojFRa0%SFQ<
ze1%xT*oB1jRVhC)F9n*&rcHJwNRJYA8fnU^sLj0|rVTpD)Qf??&k;;OXM)oN8()ME
zNcaupC96gI*ZqmSZ&P0xSSGQ@Xn-U$)g0)xpj6<YC15Vyh;2XFK=^;d5AEv>*nGjy
z*i+%3AXGNCFts)OpRxE4r2Jc_{~s`k|0l=)#M{4iSNUH9>VoGN&;Tw@yAeiUNwQCE
z4#n&T>YfjbaBHGX!4mt$$+;5@`41@ww0{4`b?D9)_8l`VW^dUg*)Du;iC|PqvV!mb
z-50VP9~T{dy!&{yd1y}|7n|j6!@R(ruuoQ<%r8Q17#v?aeigA=f8l9*y2F|tiPFW+
zbJ*H0oOpifPIbs7)dz=`X-rvHwpoj5O~6c!HLV|xExIS1^<7^yzQgj|%-HGYzn@#M
z;LWLnmv1~&xTrR9&GBFAlRxyi{oStbecW^R@)y5EL%*HV;5hy^Mpyf~QQ^N5r5%;5
zX3YFNM}3PXXQA&tn|k28!%ARy>;;AkX!9>|p^FwSz)Vq`ytaQY-ys8@mhX11Q+B>$
zHVF68*`UNWc~`<jHAVZWi&nk&yUZ1+>8hh6_wL;M^Uen)zF$;Gw?3xJl`9>m7U;OW
z!?ePJZNihsnjU|4oNI_saw-oMU!Li3bXBF^%xM8;daUPiX0(`=e0z4|*S^owuDsRF
ze(b3|;p8W?2TS@=-ITWbuVMbCVf-L8yK}*hZ?9y2UrMZYz7uGZmE#%s>f3bornA-m
zpKb6q+y^WP7@0(vai8-83<V$%V0h~YqT#3hpld@v^#`OF1eP>{mJ-6XA)Nt)ZURU(
zu*m@iz-9*&fGjQpvO(DipAOK$KM(-YF$>5<Ew}OM1swwf0U*66fJ{`qzzl@09sN)s
zm;p-~e`3>)mO_w@2tqd){Y(ag!I#;wxC_l-)Ds%e%|_qajxhTLF#a+8jAl0Ojqd37
zpl>Ng*mHxIustXnjM0ro-yVuEI$eMWqp?jFqg#Q#Qw(9ne&BIdghC0rZw%c~^tH7J
zLm$Z#VJK|%2D-86D;p8U?p7jfEY?#C(5*mUv4*h1O_i_}#YEYGzL*7JM?bLiAQa@d
z0tbCH0m9T2U^9)7sl_V9Bo_4fRD>PJ^a<MmnO#K>arDj?!pyh!gw4e4ilLhiG7nbJ
zf|}eA04n;zoe<4le7ZmlaD=XUBwe6T#itY097pI}jiwXPX2E9$sL6#e;~`EnuoT<q
zO?8+-OB$t|5nd;hG(fFhbO)k0p%EtEA;DyDgBsmvkWoxf^FVD>2mr;`H5VLB6nsXY
z1|PR84kHM~B&hw0aES{(V~C4ZP-_-pU^{UJVu@q)wk^!mC5>l^HWRJt14T=5GJ2q*
lHx>~VJ$1tqsEE)2Iv&j;s6DJ~AhYCva1tj2!!vgf4*(!CVJ83p

literal 0
HcmV?d00001

diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java
index 9feb9c8e5640f..71e8c79a2275a 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java
@@ -5,6 +5,9 @@
 import io.swagger.v3.oas.annotations.info.Info;
 import io.swagger.v3.oas.annotations.servers.Server;
 import java.util.List;
+
+import org.springdoc.core.GroupedOpenApi;
+import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.Configuration;
 import org.springframework.format.FormatterRegistry;
 import org.springframework.http.converter.ByteArrayHttpMessageConverter;
@@ -34,4 +37,26 @@ public void configureMessageConverters(List<HttpMessageConverter<?>> messageConv
   public void addFormatters(FormatterRegistry registry) {
     registry.addConverter(new StringToChangeCategoryConverter());
   }
+
+  @Bean
+  public GroupedOpenApi defaultOpenApiGroup() {
+    return GroupedOpenApi.builder()
+            .group("default")
+            .packagesToExclude(
+                    "io.datahubproject.openapi.operations",
+                    "com.datahub.health",
+                    "io.datahubproject.openapi.health"
+            ).build();
+  }
+
+  @Bean
+  public GroupedOpenApi operationsOpenApiGroup() {
+    return GroupedOpenApi.builder()
+            .group("operations")
+            .packagesToScan(
+                    "io.datahubproject.openapi.operations",
+                    "com.datahub.health",
+                    "io.datahubproject.openapi.health"
+            ).build();
+  }
 }
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/HealthController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/health/HealthController.java
similarity index 94%
rename from metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/HealthController.java
rename to metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/health/HealthController.java
index 250e9f6f71242..2e243f4c8df9e 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/HealthController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/health/HealthController.java
@@ -1,4 +1,4 @@
-package io.datahubproject.openapi;
+package io.datahubproject.openapi.health;
 
 import io.swagger.v3.oas.annotations.tags.Tag;
 import lombok.RequiredArgsConstructor;
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java
index 68a8c8ca49235..2b3e84e2df20f 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java
@@ -48,6 +48,7 @@
 import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
+import java.util.stream.IntStream;
 import java.util.stream.Stream;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
@@ -91,10 +92,11 @@ private MappingUtil() {
 
   private static final String DISCRIMINATOR = "__type";
   private static final String PEGASUS_PACKAGE = "com.linkedin";
+  private static final String OPENAPI_PACKAGE = "io.datahubproject.openapi.generated";
   private static final ReflectionCache REFLECT_AVRO = ReflectionCache.builder()
           .basePackage("com.linkedin.pegasus2avro").build();
   private static final ReflectionCache REFLECT_OPENAPI = ReflectionCache.builder()
-          .basePackage("io.datahubproject.openapi.generated").build();
+          .basePackage(OPENAPI_PACKAGE).build();
 
   static {
     // Build a map from __type name to generated class
@@ -143,49 +145,108 @@ public static EnvelopedAspect mapEnvelopedAspect(com.linkedin.entity.EnvelopedAs
   }
 
   private static DataMap insertDiscriminator(@Nullable Class<?> parentClazz, DataMap dataMap) {
-    if (REFLECT_OPENAPI.lookupMethod(parentClazz, "get__type") != null) {
+    if (parentClazz != null && REFLECT_OPENAPI.lookupMethod(parentClazz, "get__type") != null) {
       dataMap.put(DISCRIMINATOR, parentClazz.getSimpleName());
     }
 
     Set<Map.Entry<String, DataMap>> requiresDiscriminator = dataMap.entrySet().stream()
             .filter(e -> e.getValue() instanceof DataMap)
-            .filter(e -> e.getKey().startsWith(PEGASUS_PACKAGE + "."))
+            .filter(e -> shouldCollapseClassToDiscriminator(e.getKey()))
             .map(e -> Map.entry(e.getKey(), (DataMap) e.getValue()))
             .collect(Collectors.toSet());
+    // DataMap doesn't support concurrent access
     requiresDiscriminator.forEach(e -> {
       dataMap.remove(e.getKey());
-      dataMap.put(DISCRIMINATOR, e.getKey().substring(e.getKey().lastIndexOf('.') + 1));
+      dataMap.put(DISCRIMINATOR, e.getKey().substring(e.getKey().lastIndexOf(".") + 1));
       dataMap.putAll(e.getValue());
     });
 
-    Set<Pair<String, DataMap>> recurse = dataMap.entrySet().stream()
-            .filter(e -> e.getValue() instanceof DataMap || e.getValue() instanceof DataList)
-            .flatMap(e -> {
-              if (e.getValue() instanceof DataList) {
-                return ((DataList) e.getValue()).stream()
-                        .filter(item -> item instanceof DataMap)
-                        .map(item -> Pair.of((String) null, (DataMap) item));
-              } else {
-                return Stream.of(Pair.of(e.getKey(), (DataMap) e.getValue()));
+    // Look through all the nested classes for possible discriminator requirements
+    Set<Pair<List<String>, DataMap>> nestedDataMaps = getDataMapPaths(new LinkedList<>(), dataMap).collect(Collectors.toSet());
+    // DataMap doesn't support concurrent access
+    for (Pair<List<String>, DataMap> nestedDataMapPath : nestedDataMaps) {
+      List<String> nestedPath = nestedDataMapPath.getFirst();
+      DataMap nested = nestedDataMapPath.getSecond();
+      Class<?> nextClazz = parentClazz;
+
+      if (nextClazz != null) {
+        // reconstruct type path from method path
+        for (String pathElem : nestedPath) {
+          // if not list element
+          if (!pathElem.startsWith("[") && !pathElem.contains(".")) {
+            String methodName = "get" + toUpperFirst(pathElem);
+            Method getMethod = REFLECT_OPENAPI.lookupMethod(nextClazz, methodName);
+            nextClazz = getMethod != null ? getMethod.getReturnType() : null;
+
+            if (nextClazz != null && "List".equals(nextClazz.getSimpleName())) {
+              String listElemClassName = getMethod.getGenericReturnType().getTypeName()
+                      .replace("java.util.List<", "")
+                      .replace(">", "");
+              try {
+                nextClazz = Class.forName(listElemClassName);
+              } catch (ClassNotFoundException ex) {
+                log.warn("Class lookup failed for {}", listElemClassName);
+                nextClazz = null;
               }
-            }).collect(Collectors.toSet());
-
-    recurse.forEach(e -> {
-      if (e.getKey() != null) {
-        Class<?> getterClazz = null;
-        if (parentClazz != null) {
-          Method getMethod = REFLECT_OPENAPI.lookupMethod(parentClazz, "get" + toUpperFirst(e.getKey()));
-          getterClazz = getMethod.getReturnType();
+            }
+          }
+        }
+
+        if ((nextClazz != parentClazz && shouldCheckTypeMethod(nextClazz))
+                || nested.keySet().stream().anyMatch(MappingUtil::shouldCollapseClassToDiscriminator)) {
+          insertDiscriminator(nextClazz, nested);
         }
-        insertDiscriminator(getterClazz, e.getValue());
-      } else {
-        insertDiscriminator(null, e.getValue());
       }
-    });
+    }
 
     return dataMap;
   }
 
+
+  /**
+   * Stream paths to DataMaps
+   * @param paths current path
+   * @param data current DataMap or DataList
+   * @return path to all nested DataMaps
+   */
+  private static Stream<Pair<List<String>, DataMap>> getDataMapPaths(List<String> paths, Object data) {
+    if (data instanceof DataMap) {
+      return ((DataMap) data).entrySet().stream()
+              .filter(e -> e.getValue() instanceof DataMap || e.getValue() instanceof DataList)
+              .flatMap(entry -> {
+                List<String> thisPath = new LinkedList<>(paths);
+                thisPath.add(entry.getKey());
+                if (entry.getValue() instanceof DataMap) {
+                  return Stream.concat(
+                          Stream.of(Pair.of(thisPath, (DataMap) entry.getValue())),
+                          getDataMapPaths(thisPath, entry.getValue())
+                  );
+                } else {
+                  // DataList
+                  return getDataMapPaths(thisPath, entry.getValue());
+                }
+              });
+    } else if (data instanceof DataList) {
+      DataList dataList = (DataList) data;
+      return IntStream.range(0, dataList.size())
+              .mapToObj(idx -> Pair.of(idx, dataList.get(idx)))
+              .filter(idxObject -> idxObject.getValue() instanceof DataMap || idxObject.getValue() instanceof DataList)
+              .flatMap(idxObject -> {
+                Object item = idxObject.getValue();
+                List<String> thisPath = new LinkedList<>(paths);
+                thisPath.add("[" + idxObject.getKey() + "]");
+                if (item instanceof DataMap) {
+                  return Stream.concat(Stream.of(Pair.of(thisPath, (DataMap) item)),
+                          getDataMapPaths(thisPath, item));
+                } else {
+                  // DataList
+                  return getDataMapPaths(thisPath, item);
+                }
+              });
+    }
+    return Stream.empty();
+  }
+
   public static OneOfEnvelopedAspectValue mapAspectValue(String aspectName, Aspect aspect, ObjectMapper objectMapper) {
     Class<? extends OneOfEnvelopedAspectValue> aspectClass = ENVELOPED_ASPECT_TYPE_MAP.get(aspectName);
     DataMap wrapper = insertDiscriminator(aspectClass, aspect.data());
@@ -227,6 +288,14 @@ private static String getAspectName(Class<?> cls) {
     return new String(c);
   }
 
+  private static boolean shouldCheckTypeMethod(@Nullable Class<?> parentClazz) {
+    return Optional.ofNullable(parentClazz).map(cls -> cls.getName().startsWith(OPENAPI_PACKAGE + ".")).orElse(false);
+  }
+
+  private static boolean shouldCollapseClassToDiscriminator(String className) {
+    return className.startsWith(PEGASUS_PACKAGE + ".");
+  }
+
   private static Optional<String> shouldDiscriminate(String parentShortClass, String fieldName, ObjectNode node) {
     try {
       if (parentShortClass != null) {
diff --git a/metadata-service/war/src/main/webapp/WEB-INF/healthServlet-servlet.xml b/metadata-service/war/src/main/webapp/WEB-INF/healthServlet-servlet.xml
deleted file mode 100644
index 11af7d000bddf..0000000000000
--- a/metadata-service/war/src/main/webapp/WEB-INF/healthServlet-servlet.xml
+++ /dev/null
@@ -1,14 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<beans xmlns="http://www.springframework.org/schema/beans" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-       xmlns:context="http://www.springframework.org/schema/context"
-       xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-3.0.xsd http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context-3.0.xsd">
-  <context:component-scan base-package="com.datahub.health" />
-  <context:component-scan base-package="org.springdoc.webmvc.ui,org.springdoc.core,org.springdoc.webmvc.core,org.springframework.boot.autoconfigure.jackson"/>
-
-
-  <bean id="yamlProperties" class="org.springframework.beans.factory.config.YamlPropertiesFactoryBean">
-    <property name="resources" value="classpath:/application.yml"/>
-  </bean>
-
-  <context:property-placeholder properties-ref="yamlProperties"/>
-</beans>
diff --git a/metadata-service/war/src/main/webapp/WEB-INF/openapiServlet-servlet.xml b/metadata-service/war/src/main/webapp/WEB-INF/openapiServlet-servlet.xml
index 7c990cee8f65b..3077cfb062638 100644
--- a/metadata-service/war/src/main/webapp/WEB-INF/openapiServlet-servlet.xml
+++ b/metadata-service/war/src/main/webapp/WEB-INF/openapiServlet-servlet.xml
@@ -2,9 +2,9 @@
 <beans xmlns="http://www.springframework.org/schema/beans" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
        xmlns:context="http://www.springframework.org/schema/context"
        xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-3.0.xsd http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context-3.0.xsd">
-<context:component-scan base-package="io.datahubproject.openapi" />
-<context:component-scan base-package="org.springdoc.webmvc.ui,org.springdoc.core,org.springdoc.webmvc.core,org.springframework.boot.autoconfigure.jackson"/>
 
+  <context:component-scan base-package="io.datahubproject.openapi,com.datahub.health"/>
+  <context:component-scan base-package="org.springdoc.webmvc.ui,org.springdoc.core,org.springdoc.webmvc.core,org.springframework.boot.autoconfigure.jackson"/>
 
 <bean id="yamlProperties" class="org.springframework.beans.factory.config.YamlPropertiesFactoryBean">
   <property name="resources" value="classpath:/application.yml"/>
diff --git a/metadata-service/war/src/main/webapp/WEB-INF/web.xml b/metadata-service/war/src/main/webapp/WEB-INF/web.xml
index f210061a0bb27..c1239ed4b7ed4 100644
--- a/metadata-service/war/src/main/webapp/WEB-INF/web.xml
+++ b/metadata-service/war/src/main/webapp/WEB-INF/web.xml
@@ -54,12 +54,6 @@
     <load-on-startup>1</load-on-startup>
     <async-supported>true</async-supported>
   </servlet>
-  <servlet>
-    <servlet-name>healthServlet</servlet-name>
-    <servlet-class>org.springframework.web.servlet.DispatcherServlet</servlet-class>
-    <load-on-startup>1</load-on-startup>
-    <async-supported>true</async-supported>
-  </servlet>
   <servlet>
     <servlet-name>openapiServlet</servlet-name>
     <servlet-class>org.springframework.web.servlet.DispatcherServlet</servlet-class>
@@ -95,7 +89,7 @@
     <url-pattern>/health</url-pattern>
   </servlet-mapping>
   <servlet-mapping>
-    <servlet-name>healthServlet</servlet-name>
+    <servlet-name>openapiServlet</servlet-name>
     <url-pattern>/health/*</url-pattern>
   </servlet-mapping>
   <servlet-mapping>

From c779b92bd0ca0be3d2a0984ecd035d9bf2ea5d92 Mon Sep 17 00:00:00 2001
From: Lucas Phan <lucas123phan@gmail.com>
Date: Mon, 2 Oct 2023 11:58:58 -0700
Subject: [PATCH 069/156] fix(data-product): show data product card on home
 page (#8924)

---
 .../src/app/entity/dataProduct/DataProductEntity.tsx            | 2 +-
 datahub-web-react/src/app/home/HomePageRecommendations.tsx      | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/datahub-web-react/src/app/entity/dataProduct/DataProductEntity.tsx b/datahub-web-react/src/app/entity/dataProduct/DataProductEntity.tsx
index c3f1273681c19..620d42943a74a 100644
--- a/datahub-web-react/src/app/entity/dataProduct/DataProductEntity.tsx
+++ b/datahub-web-react/src/app/entity/dataProduct/DataProductEntity.tsx
@@ -51,7 +51,7 @@ export class DataProductEntity implements Entity<DataProduct> {
 
     isSearchEnabled = () => true;
 
-    isBrowseEnabled = () => false;
+    isBrowseEnabled = () => true;
 
     isLineageEnabled = () => false;
 
diff --git a/datahub-web-react/src/app/home/HomePageRecommendations.tsx b/datahub-web-react/src/app/home/HomePageRecommendations.tsx
index 39d76bf98f28a..6ce7735c4a7c8 100644
--- a/datahub-web-react/src/app/home/HomePageRecommendations.tsx
+++ b/datahub-web-react/src/app/home/HomePageRecommendations.tsx
@@ -95,6 +95,7 @@ const simpleViewEntityTypes = [
     EntityType.Dashboard,
     EntityType.GlossaryNode,
     EntityType.GlossaryTerm,
+    EntityType.DataProduct,
 ];
 
 export const HomePageRecommendations = ({ user }: Props) => {

From 6fe9d6faa55c4fe770b00390e6b64bf7ccb10fd7 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Mon, 2 Oct 2023 16:58:31 -0400
Subject: [PATCH 070/156] fix(graphql): support additional types in
 scrollAcrossEntities (#8891)

---
 .../linkedin/datahub/graphql/resolvers/EntityTypeMapper.java    | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/EntityTypeMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/EntityTypeMapper.java
index 3682b2282544e..b0f23e63177e6 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/EntityTypeMapper.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/EntityTypeMapper.java
@@ -17,6 +17,7 @@ public class EntityTypeMapper {
       ImmutableMap.<EntityType, String>builder()
           .put(EntityType.DATASET, "dataset")
           .put(EntityType.ROLE, "role")
+          .put(EntityType.ASSERTION, Constants.ASSERTION_ENTITY_NAME)
           .put(EntityType.CORP_USER, "corpuser")
           .put(EntityType.CORP_GROUP, "corpGroup")
           .put(EntityType.DATA_PLATFORM, "dataPlatform")
@@ -25,6 +26,7 @@ public class EntityTypeMapper {
           .put(EntityType.TAG, "tag")
           .put(EntityType.DATA_FLOW, "dataFlow")
           .put(EntityType.DATA_JOB, "dataJob")
+          .put(EntityType.DATA_PROCESS_INSTANCE, Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME)
           .put(EntityType.GLOSSARY_TERM, "glossaryTerm")
           .put(EntityType.GLOSSARY_NODE, "glossaryNode")
           .put(EntityType.MLMODEL, "mlModel")

From acaf950b9e5e2118310a35940e98b0799a8a9bcd Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Mon, 2 Oct 2023 16:59:18 -0400
Subject: [PATCH 071/156] docs: update cta links for acryl (#8908)

---
 .../src/app/entity/shared/components/styled/DemoButton.tsx  | 2 +-
 datahub-web-react/src/app/home/AcrylDemoBanner.tsx          | 2 +-
 docs-website/docusaurus.config.js                           | 2 +-
 docs-website/src/pages/_components/CardCTAs/index.js        | 6 +++---
 docs-website/src/pages/_components/Section/index.js         | 2 +-
 docs/saas.md                                                | 4 ++--
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/datahub-web-react/src/app/entity/shared/components/styled/DemoButton.tsx b/datahub-web-react/src/app/entity/shared/components/styled/DemoButton.tsx
index 1ed182fa01975..b7b974ef6e2ea 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/DemoButton.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/DemoButton.tsx
@@ -12,7 +12,7 @@ export default function DemoButton() {
     return (
         <StyledButton
             type="primary"
-            href="https://www.acryldata.io/datahub-sign-up"
+            href="https://www.acryldata.io/datahub-sign-up?utm_source=datahub&utm_medium=referral&utm_campaign=acryl_signup"
             target="_blank"
             rel="noopener noreferrer"
         >
diff --git a/datahub-web-react/src/app/home/AcrylDemoBanner.tsx b/datahub-web-react/src/app/home/AcrylDemoBanner.tsx
index 0a6316a71db16..0a85c0c3d7f6c 100644
--- a/datahub-web-react/src/app/home/AcrylDemoBanner.tsx
+++ b/datahub-web-react/src/app/home/AcrylDemoBanner.tsx
@@ -46,7 +46,7 @@ export default function AcrylDemoBanner() {
                 <TextContent>
                     DataHub is already the industry&apos;s #1 Open Source Data Catalog.{' '}
                     <StyledLink
-                        href="https://www.acryldata.io/datahub-sign-up"
+                        href="https://www.acryldata.io/datahub-sign-up?utm_source=datahub&utm_medium=referral&utm_campaign=acryl_signup"
                         target="_blank"
                         rel="noopener noreferrer"
                     >
diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js
index c1ecf0283cf63..68ea1ebffa6c9 100644
--- a/docs-website/docusaurus.config.js
+++ b/docs-website/docusaurus.config.js
@@ -23,7 +23,7 @@ module.exports = {
       announcementBar: {
         id: "announcement",
         content:
-          '<div><img src="/img/acryl-logo-white-mark.svg" /><p><strong>Managed DataHub</strong><span> &nbsp;Acryl Data delivers an easy to consume DataHub platform for the enterprise</span></p></div> <a href="https://www.acryldata.io/datahub-sign-up" target="_blank" class="button button--primary">Sign up for Managed DataHub&nbsp;→</a>',
+          '<div><img src="/img/acryl-logo-white-mark.svg" /><p><strong>Managed DataHub</strong><span> &nbsp;Acryl Data delivers an easy to consume DataHub platform for the enterprise</span></p></div> <a href="https://www.acryldata.io/datahub-sign-up?utm_source=datahub&utm_medium=referral&utm_campaign=acryl_signup" target="_blank" class="button button--primary">Sign up for Managed DataHub&nbsp;→</a>',
         backgroundColor: "#070707",
         textColor: "#ffffff",
         isCloseable: false,
diff --git a/docs-website/src/pages/_components/CardCTAs/index.js b/docs-website/src/pages/_components/CardCTAs/index.js
index d87c803b42818..b173101de66f5 100644
--- a/docs-website/src/pages/_components/CardCTAs/index.js
+++ b/docs-website/src/pages/_components/CardCTAs/index.js
@@ -8,17 +8,17 @@ const cardsContent = [
   {
     label: "Data Mesh",
     title: "Data Products, Delivered",
-    url: "https://www.acryldata.io/blog/data-products-in-datahub-everything-you-need-to-know",
+    url: "https://www.acryldata.io/blog/data-products-in-datahub-everything-you-need-to-know?utm_source=datahub&utm_medium=referral&utm_content=blog",
   },
   {
     label: "Data Contracts",
     title: "End-to-end Reliability in Data",
-    url: "https://www.acryldata.io/blog/data-contracts-in-datahub-combining-verifiability-with-holistic-data-management",
+    url: "https://www.acryldata.io/blog/data-contracts-in-datahub-combining-verifiability-with-holistic-data-management?utm_source=datahub&utm_medium=referral&utm_content=blog",
   },
   {
     label: "Shift Left",
     title: "Developer-friendly Data Governance",
-    url: "https://www.acryldata.io/blog/the-3-must-haves-of-metadata-management-part-2",
+    url: "https://www.acryldata.io/blog/the-3-must-haves-of-metadata-management-part-2?utm_source=datahub&utm_medium=referral&utm_content=blog",
   },
 ];
 
diff --git a/docs-website/src/pages/_components/Section/index.js b/docs-website/src/pages/_components/Section/index.js
index b7e33bad162f9..8fb8dc06937cc 100644
--- a/docs-website/src/pages/_components/Section/index.js
+++ b/docs-website/src/pages/_components/Section/index.js
@@ -18,7 +18,7 @@ const PromoSection = () => (
       <img src={useBaseUrl("/img/acryl-logo-white-mark.svg")} />
       <h2>Managed DataHub</h2>
       <p>Acryl Data delivers an easy to consume DataHub platform for the enterprise</p>
-      <a href="https://www.acryldata.io/datahub-beta" target="_blank" className="button button--primary button--lg">
+      <a href="https://www.acryldata.io/datahub-sign-up?utm_source=datahub&utm_medium=referral&utm_campaign=acryl_signup" target="_blank" className="button button--primary button--lg">
         Sign up for Managed DataHub →
       </a>
     </div>
diff --git a/docs/saas.md b/docs/saas.md
index 35dde5b1ca9a9..de57b5617e062 100644
--- a/docs/saas.md
+++ b/docs/saas.md
@@ -5,10 +5,10 @@ Sign up for fully managed, hassle-free and secure SaaS service for DataHub, prov
 <p>
 <a
     className="button button--primary button--lg"
-    href="https://www.acryldata.io/datahub-beta" 
+    href="https://www.acryldata.io/datahub-sign-up?utm_source=datahub&utm_medium=referral&utm_campaign=acryl_signup"
     target="_blank" >
     Sign up
 </a>
 </p>
 
-Refer to [Managed Datahub Exclusives](/docs/managed-datahub/managed-datahub-overview.md) for more information. 
\ No newline at end of file
+Refer to [Managed Datahub Exclusives](/docs/managed-datahub/managed-datahub-overview.md) for more information.

From 790011d40b9fe97373730e875e00237bd2d97904 Mon Sep 17 00:00:00 2001
From: Pedro Silva <pedro@acryl.io>
Date: Tue, 3 Oct 2023 04:04:55 +0100
Subject: [PATCH 072/156] feat(docs): Corrects release version for custom
 ownership types. (#8847)

---
 docs/ownership/ownership-types.md             |  2 +-
 .../examples/ownership/ownership_type.json    | 19 +++++++++++++------
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/docs/ownership/ownership-types.md b/docs/ownership/ownership-types.md
index f1b951871a5a2..243f638a324ad 100644
--- a/docs/ownership/ownership-types.md
+++ b/docs/ownership/ownership-types.md
@@ -7,7 +7,7 @@ import TabItem from '@theme/TabItem';
 <FeatureAvailability/>
 
 **🤝 Version compatibility**
-> Open Source DataHub: **0.10.3** | Acryl: **0.2.8**
+> Open Source DataHub: **0.10.4** | Acryl: **0.2.8**
 
 ## What are Custom Ownership Types?
 Custom Ownership Types are an improvement on the way to establish ownership relationships between users and the data assets they manage within DataHub.
diff --git a/metadata-ingestion/examples/ownership/ownership_type.json b/metadata-ingestion/examples/ownership/ownership_type.json
index 5f1d3019d2a77..4a194c78a3b72 100644
--- a/metadata-ingestion/examples/ownership/ownership_type.json
+++ b/metadata-ingestion/examples/ownership/ownership_type.json
@@ -1,7 +1,14 @@
-{
-  "urn": "urn:li:ownershipType:architect",
-  "info": {
-    "name": "Architect",
-    "description": "Technical person responsible for the asset"
+[
+  {
+    "auditHeader":null,
+    "entityType":"ownershipType",
+    "entityUrn": "urn:li:ownershipType:architect",
+    "changeType":"UPSERT",
+    "aspectName":"ownershipTypeInfo",
+    "aspect":{
+      "value":"{\"name\": \"Architect\", \"description\": \"Technical person responsible for the asset\", \"created\": {\"time\": 1674291843000,  \"actor\": \"urn:li:corpuser:jdoe\",  \"impersonator\": null},\n\"lastModified\": {\"time\": 1674291843000,  \"actor\": \"urn:li:corpuser:jdoe\",  \"impersonator\": null}}",
+      "contentType":"application/json"
+    },
+    "systemMetadata":null
   }
-}
\ No newline at end of file
+]
\ No newline at end of file

From 2f0616ea5b2c1927107a4726773c907a59a0483f Mon Sep 17 00:00:00 2001
From: Erik McKelvey <Erik.McKelvey.is@gmail.com>
Date: Mon, 2 Oct 2023 20:05:29 -0700
Subject: [PATCH 073/156] docs: fix typo in impact-analysis.md (#8915)

---
 docs/act-on-metadata/impact-analysis.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/act-on-metadata/impact-analysis.md b/docs/act-on-metadata/impact-analysis.md
index 2c10e571cf911..9728a480efe32 100644
--- a/docs/act-on-metadata/impact-analysis.md
+++ b/docs/act-on-metadata/impact-analysis.md
@@ -38,7 +38,7 @@ Follow these simple steps to understand the full dependency chain of your data e
   <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/impact-analysis-filter-dependencies.png"/>
 </p>
 
-4. Slice and dice the result list by Entity Type, Platfrom, Owner, and more to isolate the relevant dependencies
+4. Slice and dice the result list by Entity Type, Platform, Owner, and more to isolate the relevant dependencies
 
 <p align="center">
   <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/impact-analysis-apply-filters.png"/>

From 83a7dad20e7420b7283db22a2964d05ee3c42a7d Mon Sep 17 00:00:00 2001
From: Lucas Phan <lucas123phan@gmail.com>
Date: Tue, 3 Oct 2023 10:05:11 -0700
Subject: [PATCH 074/156] =?UTF-8?q?feat(chrom-ext-editable):=20set=20readO?=
 =?UTF-8?q?nly=20to=20false=20so=20that=20side=20navigati=E2=80=A6=20(#893?=
 =?UTF-8?q?0)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../src/app/entity/shared/embed/EmbeddedProfile.tsx  | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/datahub-web-react/src/app/entity/shared/embed/EmbeddedProfile.tsx b/datahub-web-react/src/app/entity/shared/embed/EmbeddedProfile.tsx
index 31a736e30bdc0..df928fc408de6 100644
--- a/datahub-web-react/src/app/entity/shared/embed/EmbeddedProfile.tsx
+++ b/datahub-web-react/src/app/entity/shared/embed/EmbeddedProfile.tsx
@@ -55,6 +55,8 @@ export default function EmbeddedProfile<T>({ urn, entityType, getOverridePropert
         return <NonExistentEntityPage />;
     }
 
+    const readOnly = false;
+
     return (
         <EntityContext.Provider
             value={{
@@ -80,15 +82,15 @@ export default function EmbeddedProfile<T>({ urn, entityType, getOverridePropert
                     <StyledDivider />
                     <UpstreamHealth />
                     <StyledDivider />
-                    <SidebarAboutSection readOnly />
+                    <SidebarAboutSection readOnly={readOnly} />
                     <StyledDivider />
-                    <SidebarOwnerSection readOnly />
+                    <SidebarOwnerSection readOnly={readOnly} />
                     <StyledDivider />
-                    <SidebarTagsSection readOnly properties={{ hasTags: true, hasTerms: true }} />
+                    <SidebarTagsSection readOnly={readOnly} properties={{ hasTags: true, hasTerms: true }} />
                     <StyledDivider />
-                    <SidebarDomainSection readOnly />
+                    <SidebarDomainSection readOnly={readOnly} />
                     <StyledDivider />
-                    <DataProductSection readOnly />
+                    <DataProductSection readOnly={readOnly} />
                 </>
             )}
         </EntityContext.Provider>

From 0a5e7d176e103c14f36cd00cd1b930c5da55e1ea Mon Sep 17 00:00:00 2001
From: Ellie O'Neil <110510035+eboneil@users.noreply.github.com>
Date: Tue, 3 Oct 2023 11:53:05 -0700
Subject: [PATCH 075/156] fix(client): use value for RelationshipDirection
 (#8912)

---
 metadata-ingestion/src/datahub/ingestion/graph/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py
index e22d48d0af80a..673ada4f73051 100644
--- a/metadata-ingestion/src/datahub/ingestion/graph/client.py
+++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py
@@ -805,7 +805,7 @@ def get_related_entities(
                 url=relationship_endpoint,
                 params={
                     "urn": entity_urn,
-                    "direction": direction,
+                    "direction": direction.value,
                     "relationshipTypes": relationship_types,
                     "start": start,
                 },

From 555f92a047696fc99b8fe599285e21774a09e381 Mon Sep 17 00:00:00 2001
From: Ellie O'Neil <110510035+eboneil@users.noreply.github.com>
Date: Tue, 3 Oct 2023 14:49:24 -0700
Subject: [PATCH 076/156] fix(fine-grained lineage) CLL for datajob downstreams
 (#8937)

---
 .../utils/__tests__/columnLineageUtils.test.tsx  | 14 +++++++++++++-
 .../src/app/lineage/utils/columnLineageUtils.ts  | 16 +++++++++++++---
 .../src/app/lineage/utils/extendAsyncEntities.ts | 12 ++++++++++++
 3 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/datahub-web-react/src/app/lineage/utils/__tests__/columnLineageUtils.test.tsx b/datahub-web-react/src/app/lineage/utils/__tests__/columnLineageUtils.test.tsx
index cd0a5f1385858..c11d8fe90cfa9 100644
--- a/datahub-web-react/src/app/lineage/utils/__tests__/columnLineageUtils.test.tsx
+++ b/datahub-web-react/src/app/lineage/utils/__tests__/columnLineageUtils.test.tsx
@@ -88,7 +88,7 @@ describe('encodeSchemaField', () => {
 });
 
 describe('getPopulatedColumnsByUrn', () => {
-    it('should update columns by urn with data job fine grained data so that the data job appears to have the upstream columns', () => {
+    it('should update columns by urn with data job fine grained data so that the data job appears to have the upstream and downstream columns', () => {
         const dataJobWithCLL = {
             ...dataJob1,
             name: '',
@@ -116,12 +116,24 @@ describe('getPopulatedColumnsByUrn', () => {
                     recursive: false,
                     type: SchemaFieldDataType.String,
                 },
+                {
+                    fieldPath: 'test2',
+                    nullable: false,
+                    recursive: false,
+                    type: SchemaFieldDataType.String,
+                },
                 {
                     fieldPath: 'test3',
                     nullable: false,
                     recursive: false,
                     type: SchemaFieldDataType.String,
                 },
+                {
+                    fieldPath: 'test4',
+                    nullable: false,
+                    recursive: false,
+                    type: SchemaFieldDataType.String,
+                },
             ],
         });
     });
diff --git a/datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts b/datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts
index 4dd54ea25416d..60b1698444168 100644
--- a/datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts
+++ b/datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts
@@ -88,9 +88,9 @@ export function getPopulatedColumnsByUrn(
                 ),
             };
         } else if (fetchedEntity.type === EntityType.DataJob && fetchedEntity.fineGrainedLineages) {
-            // Add upstream fields from fineGrainedLineage onto DataJob to mimic upstream dataset fields.
-            // DataJobs will virtually "have" these fields so we can draw full column paths
-            // from upstream dataset fields to downstream dataset fields.
+            // Add upstream and downstream fields from fineGrainedLineage onto DataJob to mimic upstream
+            // and downstream dataset fields. DataJobs will virtually "have" these fields so we can draw
+            // full column paths from upstream dataset fields to downstream dataset fields.
             const fields: SchemaField[] = [];
             fetchedEntity.fineGrainedLineages.forEach((fineGrainedLineage) => {
                 fineGrainedLineage.upstreams?.forEach((upstream) => {
@@ -103,6 +103,16 @@ export function getPopulatedColumnsByUrn(
                         });
                     }
                 });
+                fineGrainedLineage.downstreams?.forEach((downstream) => {
+                    if (!fields.some((field) => field.fieldPath === downstream.path)) {
+                        fields.push({
+                            fieldPath: downgradeV2FieldPath(downstream.path) || '',
+                            nullable: false,
+                            recursive: false,
+                            type: SchemaFieldDataType.String,
+                        });
+                    }
+                });
             });
             populatedColumnsByUrn = { ...populatedColumnsByUrn, [urn]: fields };
         }
diff --git a/datahub-web-react/src/app/lineage/utils/extendAsyncEntities.ts b/datahub-web-react/src/app/lineage/utils/extendAsyncEntities.ts
index 860b5715f34c9..30e81a37dc380 100644
--- a/datahub-web-react/src/app/lineage/utils/extendAsyncEntities.ts
+++ b/datahub-web-react/src/app/lineage/utils/extendAsyncEntities.ts
@@ -130,6 +130,18 @@ export function extendColumnLineage(
                     });
                 });
             });
+            if (lineageVizConfig.type === EntityType.DataJob && !fineGrainedLineage.upstreams?.length) {
+                fineGrainedLineage.downstreams?.forEach((downstream) => {
+                    const [downstreamEntityUrn, downstreamField] = breakFieldUrn(downstream);
+                    updateFineGrainedMap(
+                        fineGrainedMap,
+                        lineageVizConfig.urn,
+                        downstreamField,
+                        downstreamEntityUrn,
+                        downstreamField,
+                    );
+                });
+            }
         });
     }
 

From 9deb7be3fcab2bdd62f508e5e175c95b2f833e7d Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Tue, 3 Oct 2023 23:17:49 -0400
Subject: [PATCH 077/156] fix(ingest): refactor test markers + fix disk space
 issues in CI (#8938)

---
 .github/workflows/metadata-ingestion.yml      | 13 +++++--
 metadata-ingestion/build.gradle               | 22 +++++------
 metadata-ingestion/developing.md              |  7 ++--
 metadata-ingestion/setup.cfg                  |  9 +++--
 metadata-ingestion/setup.py                   |  3 ++
 metadata-ingestion/tests/conftest.py          | 39 +++++++++++++++++++
 .../test_business_glossary.py                 | 11 +-----
 .../delta_lake/test_delta_lake_minio.py       |  5 ++-
 .../tests/integration/hana/test_hana.py       |  2 +-
 .../tests/integration/hive/test_hive.py       |  4 +-
 .../tests/integration/iceberg/test_iceberg.py | 22 +++++++----
 .../kafka-connect/test_kafka_connect.py       | 11 ++----
 .../tests/integration/nifi/test_nifi.py       |  9 +++--
 .../integration/powerbi/test_m_parser.py      |  2 +
 .../tests/integration/powerbi/test_powerbi.py |  1 +
 .../presto-on-hive/test_presto_on_hive.py     |  3 +-
 .../tableau/test_tableau_ingest.py            |  2 +-
 .../tests/test_helpers/docker_helpers.py      | 23 +++++++++++
 18 files changed, 128 insertions(+), 60 deletions(-)

diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml
index fff41e481c3cb..8d56a0adf5bd5 100644
--- a/.github/workflows/metadata-ingestion.yml
+++ b/.github/workflows/metadata-ingestion.yml
@@ -36,9 +36,9 @@ jobs:
           [
             "lint",
             "testQuick",
-            "testIntegration",
+            "testIntegrationBatch0",
             "testIntegrationBatch1",
-            "testSlowIntegration",
+            "testIntegrationBatch2",
           ]
         include:
           - python-version: "3.7"
@@ -56,9 +56,14 @@ jobs:
         run: ./gradlew :metadata-ingestion:installPackageOnly
       - name: Run metadata-ingestion tests
         run: ./gradlew :metadata-ingestion:${{ matrix.command }}
-      - name: pip freeze show list installed
+      - name: Debug info
         if: always()
-        run: source metadata-ingestion/venv/bin/activate && pip freeze
+        run: |
+          source metadata-ingestion/venv/bin/activate && pip freeze
+          set -x
+          df -hl
+          docker image ls
+          docker system df
       - uses: actions/upload-artifact@v3
         if: ${{ always() && matrix.command != 'lint' }}
         with:
diff --git a/metadata-ingestion/build.gradle b/metadata-ingestion/build.gradle
index ea7990ab9c660..0d8de625ec709 100644
--- a/metadata-ingestion/build.gradle
+++ b/metadata-ingestion/build.gradle
@@ -12,7 +12,7 @@ if (!project.hasProperty("extra_pip_requirements")) {
 }
 
 def get_coverage_arg(test_name) {
-  return "--cov-report term --cov-report xml:coverage_${test_name}.xml "
+  return "--cov-report xml:coverage_${test_name}.xml "
 }
 
 task checkPythonVersion(type: Exec) {
@@ -138,7 +138,7 @@ task testQuick(type: Exec, dependsOn: [installDev, ':metadata-models:generateJso
   outputs.dir("${venv_name}")
   def cvg_arg = get_coverage_arg("quick")
   commandLine 'bash', '-c',
-    "source ${venv_name}/bin/activate && pytest ${cvg_arg} --durations=20 -m 'not integration and not integration_batch_1 and not slow_integration' -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
+    "source ${venv_name}/bin/activate && pytest ${cvg_arg} tests/unit --durations=20 -m 'not integration' -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
 }
 
 task installDevTest(type: Exec, dependsOn: [install]) {
@@ -164,27 +164,25 @@ task testSingle(dependsOn: [installDevTest]) {
   }
 }
 
-task testIntegration(type: Exec, dependsOn: [installDevTest]) {
-  def cvg_arg = get_coverage_arg("int")
+task testIntegrationBatch0(type: Exec, dependsOn: [installDevTest]) {
+  def cvg_arg = get_coverage_arg("intBatch0")
   commandLine 'bash', '-c',
-    "source ${venv_name}/bin/activate && pytest ${cvg_arg} --durations=50 -m 'integration' -vv --continue-on-collection-errors --junit-xml=junit.integration.xml"
+    "source ${venv_name}/bin/activate && pytest ${cvg_arg} --durations=50 -m 'integration_batch_0' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch0.xml"
 }
-
 task testIntegrationBatch1(type: Exec, dependsOn: [installDevTest]) {
   def cvg_arg = get_coverage_arg("intBatch1")
   commandLine 'bash', '-c',
     "source ${venv_name}/bin/activate && pytest ${cvg_arg} --durations=50 -m 'integration_batch_1' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch1.xml"
 }
-
-task testFull(type: Exec, dependsOn: [installDevTest]) {
+task testIntegrationBatch2(type: Exec, dependsOn: [installDevTest]) {
+  def cvg_arg = get_coverage_arg("intBatch2")
   commandLine 'bash', '-c',
-    "source ${venv_name}/bin/activate && pytest --durations=50 -vv --continue-on-collection-errors --junit-xml=junit.full.xml"
+    "source ${venv_name}/bin/activate && pytest ${cvg_arg} --durations=20 -m 'integration_batch_2' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch2.xml"
 }
 
-task testSlowIntegration(type: Exec, dependsOn: [installDevTest]) {
-  def cvg_arg = get_coverage_arg("intSlow")
+task testFull(type: Exec, dependsOn: [installDevTest]) {
   commandLine 'bash', '-c',
-    "source ${venv_name}/bin/activate && pytest ${cvg_arg} --durations=20 -m 'slow_integration' -vv --continue-on-collection-errors --junit-xml=junit.slow.integration.xml"
+    "source ${venv_name}/bin/activate && pytest --durations=50 -vv --continue-on-collection-errors --junit-xml=junit.full.xml"
 }
 
 task specGen(type: Exec, dependsOn: [codegen, installDevTest]) {
diff --git a/metadata-ingestion/developing.md b/metadata-ingestion/developing.md
index f529590e2ab39..d5f834936cdcf 100644
--- a/metadata-ingestion/developing.md
+++ b/metadata-ingestion/developing.md
@@ -36,6 +36,7 @@ cd metadata-ingestion-modules/airflow-plugin
 source venv/bin/activate
 datahub version  # should print "DataHub CLI version: unavailable (installed in develop mode)"
 ```
+
 ### Common setup issues
 
 Common issues (click to expand):
@@ -111,6 +112,7 @@ mypy src/ tests/
 ```
 
 or you can run from root of the repository
+
 ```shell
 ./gradlew :metadata-ingestion:lintFix
 ```
@@ -178,14 +180,11 @@ pip install -e '.[integration-tests]'
 pytest -vv
 
 # Run unit tests.
-pytest -m 'not integration and not slow_integration'
+pytest -m 'not integration'
 
 # Run Docker-based integration tests.
 pytest -m 'integration'
 
-# Run Docker-based slow integration tests.
-pytest -m 'slow_integration'
-
 # You can also run these steps via the gradle build:
 ../gradlew :metadata-ingestion:lint
 ../gradlew :metadata-ingestion:lintFix
diff --git a/metadata-ingestion/setup.cfg b/metadata-ingestion/setup.cfg
index fad55b99ec938..8b78e4d3c9c6f 100644
--- a/metadata-ingestion/setup.cfg
+++ b/metadata-ingestion/setup.cfg
@@ -75,10 +75,11 @@ disallow_untyped_defs = yes
 asyncio_mode = auto
 addopts = --cov=src --cov-report= --cov-config setup.cfg --strict-markers
 markers =
-    slow_unit: marks tests to only run slow unit tests (deselect with '-m not slow_unit')
-    integration: marks tests to only run in integration (deselect with '-m "not integration"')
-    integration_batch_1: mark tests to only run in batch 1 of integration tests. This is done mainly for parallelisation (deselect with '-m not integration_batch_1')
-    slow_integration: marks tests that are too slow to even run in integration (deselect with '-m "not slow_integration"')
+    slow: marks tests that are slow to run, including all docker-based tests (deselect with '-m not slow')
+    integration: marks all integration tests, across all batches (deselect with '-m "not integration"')
+    integration_batch_0: mark tests to run in batch 0 of integration tests. This is done mainly for parallelisation in CI. Batch 0 is the default batch.
+    integration_batch_1: mark tests to run in batch 1 of integration tests
+    integration_batch_2: mark tests to run in batch 2 of integration tests
 testpaths =
     tests/unit
     tests/integration
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 024950e3a6fd5..71e4ea6cb3b85 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -470,6 +470,7 @@ def get_long_description():
     *list(
         dependency
         for plugin in [
+            "athena",
             "bigquery",
             "clickhouse",
             "clickhouse-usage",
@@ -492,6 +493,7 @@ def get_long_description():
             "kafka",
             "datahub-rest",
             "datahub-lite",
+            "great-expectations",
             "presto",
             "redash",
             "redshift",
@@ -530,6 +532,7 @@ def get_long_description():
             "clickhouse",
             "delta-lake",
             "druid",
+            "feast" if sys.version_info >= (3, 8) else None,
             "hana",
             "hive",
             "iceberg" if sys.version_info >= (3, 8) else None,
diff --git a/metadata-ingestion/tests/conftest.py b/metadata-ingestion/tests/conftest.py
index 0eb9ab250339c..0f278ab1e1311 100644
--- a/metadata-ingestion/tests/conftest.py
+++ b/metadata-ingestion/tests/conftest.py
@@ -1,6 +1,8 @@
 import logging
 import os
+import pathlib
 import time
+from typing import List
 
 import pytest
 
@@ -49,3 +51,40 @@ def pytest_addoption(parser):
         default=False,
     )
     parser.addoption("--copy-output-files", action="store_true", default=False)
+
+
+def pytest_collection_modifyitems(
+    config: pytest.Config, items: List[pytest.Item]
+) -> None:
+    # https://docs.pytest.org/en/latest/reference/reference.html#pytest.hookspec.pytest_collection_modifyitems
+    # Adapted from https://stackoverflow.com/a/57046943/5004662.
+
+    root = pathlib.Path(config.rootpath)
+    integration_path = root / "tests/integration"
+
+    for item in items:
+        test_path = pathlib.Path(item.fspath)
+
+        if (
+            "docker_compose_runner" in item.fixturenames  # type: ignore[attr-defined]
+            or any(
+                marker.name == "integration_batch_2" for marker in item.iter_markers()
+            )
+        ):
+            item.add_marker(pytest.mark.slow)
+
+        is_already_integration = any(
+            marker.name == "integration" for marker in item.iter_markers()
+        )
+
+        if integration_path in test_path.parents or is_already_integration:
+            # If it doesn't have a marker yet, put it in integration_batch_0.
+            if not any(
+                marker.name.startswith("integration_batch_")
+                for marker in item.iter_markers()
+            ):
+                item.add_marker(pytest.mark.integration_batch_0)
+
+            # Mark everything as an integration test.
+            if not is_already_integration:
+                item.add_marker(pytest.mark.integration)
diff --git a/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py b/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py
index 11fed2a805565..b6e1aca4d4fed 100644
--- a/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py
+++ b/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List
+from typing import Any, Dict
 
 import pytest
 from freezegun import freeze_time
@@ -45,14 +45,6 @@ def test_glossary_ingest(
 ):
     test_resources_dir = pytestconfig.rootpath / "tests/integration/business-glossary"
 
-    # These paths change from one instance run of the clickhouse docker to the other,
-    # and the FROZEN_TIME does not apply to these.
-    ignore_paths: List[str] = [
-        r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['metadata_modification_time'\]",
-        r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['data_paths'\]",
-        r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['metadata_path'\]",
-    ]
-
     output_mces_path: str = f"{tmp_path}/glossary_events.json"
     golden_mces_path: str = f"{test_resources_dir}/{golden_file}"
 
@@ -72,7 +64,6 @@ def test_glossary_ingest(
     # Verify the output.
     mce_helpers.check_golden_file(
         pytestconfig,
-        ignore_paths=ignore_paths,
         output_path=output_mces_path,
         golden_path=golden_mces_path,
     )
diff --git a/metadata-ingestion/tests/integration/delta_lake/test_delta_lake_minio.py b/metadata-ingestion/tests/integration/delta_lake/test_delta_lake_minio.py
index 36ec1d317fec4..6146c6d1a948c 100644
--- a/metadata-ingestion/tests/integration/delta_lake/test_delta_lake_minio.py
+++ b/metadata-ingestion/tests/integration/delta_lake/test_delta_lake_minio.py
@@ -9,6 +9,8 @@
 from tests.test_helpers import mce_helpers
 from tests.test_helpers.docker_helpers import wait_for_port
 
+pytestmark = pytest.mark.integration_batch_2
+
 FROZEN_TIME = "2020-04-14 07:00:00"
 MINIO_PORT = 9000
 
@@ -64,7 +66,7 @@ def populate_minio(pytestconfig, s3_bkt):
         pytestconfig.rootpath / "tests/integration/delta_lake/test_data/"
     )
 
-    for root, dirs, files in os.walk(test_resources_dir):
+    for root, _dirs, files in os.walk(test_resources_dir):
         for file in files:
             full_path = os.path.join(root, file)
             rel_path = os.path.relpath(full_path, test_resources_dir)
@@ -72,7 +74,6 @@ def populate_minio(pytestconfig, s3_bkt):
     yield
 
 
-@pytest.mark.slow_integration
 @freezegun.freeze_time("2023-01-01 00:00:00+00:00")
 def test_delta_lake_ingest(pytestconfig, tmp_path, test_resources_dir):
     # Run the metadata ingestion pipeline.
diff --git a/metadata-ingestion/tests/integration/hana/test_hana.py b/metadata-ingestion/tests/integration/hana/test_hana.py
index 0fa234d059e5e..726f8744167db 100644
--- a/metadata-ingestion/tests/integration/hana/test_hana.py
+++ b/metadata-ingestion/tests/integration/hana/test_hana.py
@@ -7,12 +7,12 @@
 from tests.test_helpers.click_helpers import run_datahub_cmd
 from tests.test_helpers.docker_helpers import wait_for_port
 
+pytestmark = pytest.mark.integration_batch_2
 FROZEN_TIME = "2020-04-14 07:00:00"
 
 
 @freeze_time(FROZEN_TIME)
 @pytest.mark.xfail  # TODO: debug the flakes for this test
-@pytest.mark.slow_integration
 @pytest.mark.skipif(
     platform.machine().lower() == "aarch64",
     reason="The hdbcli dependency is not available for aarch64",
diff --git a/metadata-ingestion/tests/integration/hive/test_hive.py b/metadata-ingestion/tests/integration/hive/test_hive.py
index ce166c3b336ac..caffb761380dd 100644
--- a/metadata-ingestion/tests/integration/hive/test_hive.py
+++ b/metadata-ingestion/tests/integration/hive/test_hive.py
@@ -12,6 +12,8 @@
 
 data_platform = "hive"
 
+pytestmark = pytest.mark.integration_batch_1
+
 
 @pytest.fixture(scope="module")
 def hive_runner(docker_compose_runner, pytestconfig):
@@ -54,7 +56,6 @@ def base_pipeline_config(events_file, db=None):
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration_batch_1
 def test_hive_ingest(
     loaded_hive, pytestconfig, test_resources_dir, tmp_path, mock_time
 ):
@@ -110,7 +111,6 @@ def test_hive_ingest_all_db(
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration_batch_1
 def test_hive_instance_check(loaded_hive, test_resources_dir, tmp_path, pytestconfig):
     instance: str = "production_warehouse"
 
diff --git a/metadata-ingestion/tests/integration/iceberg/test_iceberg.py b/metadata-ingestion/tests/integration/iceberg/test_iceberg.py
index e2a86480672e5..65ede11c3f1c0 100644
--- a/metadata-ingestion/tests/integration/iceberg/test_iceberg.py
+++ b/metadata-ingestion/tests/integration/iceberg/test_iceberg.py
@@ -8,22 +8,31 @@
 
 from tests.test_helpers import mce_helpers
 from tests.test_helpers.click_helpers import run_datahub_cmd
-from tests.test_helpers.docker_helpers import wait_for_port
+from tests.test_helpers.docker_helpers import cleanup_image, wait_for_port
 from tests.test_helpers.state_helpers import (
     get_current_checkpoint_from_pipeline,
     run_and_get_pipeline,
     validate_all_providers_have_committed_successfully,
 )
 
+pytestmark = [
+    pytest.mark.integration_batch_1,
+    # Skip tests if not on Python 3.8 or higher.
+    pytest.mark.skipif(
+        sys.version_info < (3, 8), reason="Requires python 3.8 or higher"
+    ),
+]
 FROZEN_TIME = "2020-04-14 07:00:00"
 GMS_PORT = 8080
 GMS_SERVER = f"http://localhost:{GMS_PORT}"
 
 
-@pytest.fixture(autouse=True)
-def skip_tests_if_python_before_3_8():
-    if sys.version_info < (3, 8):
-        pytest.skip("Requires python 3.8 or higher")
+@pytest.fixture(autouse=True, scope="module")
+def remove_docker_image():
+    yield
+
+    # The tabulario/spark-iceberg image is pretty large, so we remove it after the test.
+    cleanup_image("tabulario/spark-iceberg")
 
 
 def spark_submit(file_path: str, args: str = "") -> None:
@@ -36,7 +45,6 @@ def spark_submit(file_path: str, args: str = "") -> None:
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration
 def test_iceberg_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time):
     test_resources_dir = pytestconfig.rootpath / "tests/integration/iceberg/"
 
@@ -69,7 +77,6 @@ def test_iceberg_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration
 def test_iceberg_stateful_ingest(
     docker_compose_runner, pytestconfig, tmp_path, mock_time, mock_datahub_graph
 ):
@@ -189,7 +196,6 @@ def test_iceberg_stateful_ingest(
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration
 def test_iceberg_profiling(docker_compose_runner, pytestconfig, tmp_path, mock_time):
     test_resources_dir = pytestconfig.rootpath / "tests/integration/iceberg/"
 
diff --git a/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py b/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py
index 48063908e624f..8cf76cfb26af7 100644
--- a/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py
+++ b/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py
@@ -1,5 +1,5 @@
 import subprocess
-from typing import Any, Dict, List, cast
+from typing import Any, Dict, List, Optional, cast
 from unittest import mock
 
 import pytest
@@ -16,6 +16,7 @@
     validate_all_providers_have_committed_successfully,
 )
 
+pytestmark = pytest.mark.integration_batch_1
 FROZEN_TIME = "2021-10-25 13:00:00"
 GMS_PORT = 8080
 GMS_SERVER = f"http://localhost:{GMS_PORT}"
@@ -345,7 +346,6 @@ def loaded_kafka_connect(kafka_connect_runner):
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration_batch_1
 def test_kafka_connect_ingest(
     loaded_kafka_connect, pytestconfig, tmp_path, test_resources_dir
 ):
@@ -363,7 +363,6 @@ def test_kafka_connect_ingest(
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration_batch_1
 def test_kafka_connect_mongosourceconnect_ingest(
     loaded_kafka_connect, pytestconfig, tmp_path, test_resources_dir
 ):
@@ -381,7 +380,6 @@ def test_kafka_connect_mongosourceconnect_ingest(
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration_batch_1
 def test_kafka_connect_s3sink_ingest(
     loaded_kafka_connect, pytestconfig, tmp_path, test_resources_dir
 ):
@@ -399,7 +397,6 @@ def test_kafka_connect_s3sink_ingest(
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration_batch_1
 def test_kafka_connect_ingest_stateful(
     loaded_kafka_connect, pytestconfig, tmp_path, mock_datahub_graph, test_resources_dir
 ):
@@ -536,7 +533,7 @@ def test_kafka_connect_ingest_stateful(
     assert sorted(deleted_job_urns) == sorted(difference_job_urns)
 
 
-def register_mock_api(request_mock: Any, override_data: dict = {}) -> None:
+def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) -> None:
     api_vs_response = {
         "http://localhost:28083": {
             "method": "GET",
@@ -549,7 +546,7 @@ def register_mock_api(request_mock: Any, override_data: dict = {}) -> None:
         },
     }
 
-    api_vs_response.update(override_data)
+    api_vs_response.update(override_data or {})
 
     for url in api_vs_response.keys():
         request_mock.register_uri(
diff --git a/metadata-ingestion/tests/integration/nifi/test_nifi.py b/metadata-ingestion/tests/integration/nifi/test_nifi.py
index 58efd32c6deb3..bf17ee7472258 100644
--- a/metadata-ingestion/tests/integration/nifi/test_nifi.py
+++ b/metadata-ingestion/tests/integration/nifi/test_nifi.py
@@ -7,7 +7,9 @@
 
 from datahub.ingestion.run.pipeline import Pipeline
 from tests.test_helpers import fs_helpers, mce_helpers
-from tests.test_helpers.docker_helpers import wait_for_port
+from tests.test_helpers.docker_helpers import cleanup_image, wait_for_port
+
+pytestmark = pytest.mark.integration_batch_2
 
 FROZEN_TIME = "2021-12-03 12:00:00"
 
@@ -48,9 +50,11 @@ def loaded_nifi(docker_compose_runner, test_resources_dir):
         )
         yield docker_services
 
+    # The nifi image is pretty large, so we remove it after the test.
+    cleanup_image("apache/nifi")
+
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.slow_integration
 def test_nifi_ingest_standalone(
     loaded_nifi, pytestconfig, tmp_path, test_resources_dir
 ):
@@ -106,7 +110,6 @@ def test_nifi_ingest_standalone(
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.slow_integration
 def test_nifi_ingest_cluster(loaded_nifi, pytestconfig, tmp_path, test_resources_dir):
     # Wait for nifi cluster to execute all lineage processors, max wait time 120 seconds
     url = "http://localhost:9080/nifi-api/flow/process-groups/root"
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index e77a12aa4088e..2fcbf5a0c0860 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -18,6 +18,8 @@
 from datahub.ingestion.source.powerbi.m_query import parser, tree_function
 from datahub.ingestion.source.powerbi.m_query.resolver import DataPlatformTable
 
+pytestmark = pytest.mark.slow
+
 M_QUERIES = [
     'let\n    Source = Snowflake.Databases("bu10758.ap-unknown-2.fakecomputing.com","PBI_TEST_WAREHOUSE_PROD",[Role="PBI_TEST_MEMBER"]),\n    PBI_TEST_Database = Source{[Name="PBI_TEST",Kind="Database"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name="TEST",Kind="Schema"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name="TESTTABLE",Kind="Table"]}[Data]\nin\n    TESTTABLE_Table',
     'let\n    Source = Value.NativeQuery(Snowflake.Databases("bu10758.ap-unknown-2.fakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "SELECT#(lf)concat((UPPER(REPLACE(SELLER,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4", null, [EnableFolding=true]),\n    #"ADDed Conditional Column" = Table.AddColumn(Source, "SME Units ENT", each if [DEAL_TYPE] = "SME Unit" then [UNIT] else 0),\n    #"Added Conditional Column1" = Table.AddColumn(#"Added Conditional Column", "Banklink Units", each if [DEAL_TYPE] = "Banklink" then [UNIT] else 0),\n    #"Removed Columns" = Table.RemoveColumns(#"Added Conditional Column1",{"Banklink Units"}),\n    #"Added Custom" = Table.AddColumn(#"Removed Columns", "Banklink Units", each if [DEAL_TYPE] = "Banklink" and [SALES_TYPE] = "3 - Upsell"\nthen [UNIT]\n\nelse if [SALES_TYPE] = "Adjusted BL Migration"\nthen [UNIT]\n\nelse 0),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom", "SME Units in $ (*$361)", each if [DEAL_TYPE] = "SME Unit" \nand [SALES_TYPE] <> "4 - Renewal"\n    then [UNIT] * 361\nelse 0),\n    #"Added Custom2" = Table.AddColumn(#"Added Custom1", "Banklink in $ (*$148)", each [Banklink Units] * 148)\nin\n    #"Added Custom2"',
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index 5036f758a7de9..044532021a19c 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -20,6 +20,7 @@
 )
 from tests.test_helpers import mce_helpers
 
+pytestmark = pytest.mark.slow
 FROZEN_TIME = "2022-02-03 07:00:00"
 
 
diff --git a/metadata-ingestion/tests/integration/presto-on-hive/test_presto_on_hive.py b/metadata-ingestion/tests/integration/presto-on-hive/test_presto_on_hive.py
index 17e21f3790070..31d801ccf7dee 100644
--- a/metadata-ingestion/tests/integration/presto-on-hive/test_presto_on_hive.py
+++ b/metadata-ingestion/tests/integration/presto-on-hive/test_presto_on_hive.py
@@ -10,6 +10,7 @@
 from tests.test_helpers import fs_helpers, mce_helpers
 from tests.test_helpers.docker_helpers import wait_for_port
 
+pytestmark = pytest.mark.integration_batch_1
 FROZEN_TIME = "2021-09-23 12:00:00"
 
 data_platform = "presto-on-hive"
@@ -51,7 +52,6 @@ def loaded_presto_on_hive(presto_on_hive_runner):
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration_batch_1
 @pytest.mark.parametrize(
     "mode,use_catalog_subtype,use_dataset_pascalcase_subtype,include_catalog_name_in_ids,simplify_nested_field_paths,"
     "test_suffix",
@@ -137,7 +137,6 @@ def test_presto_on_hive_ingest(
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration_batch_1
 def test_presto_on_hive_instance_ingest(
     loaded_presto_on_hive, test_resources_dir, pytestconfig, tmp_path, mock_time
 ):
diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
index 71428a7847953..53b8519a886d3 100644
--- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
+++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
@@ -757,7 +757,7 @@ def test_tableau_no_verify():
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.slow_unit
+@pytest.mark.slow
 def test_tableau_signout_timeout(pytestconfig, tmp_path, mock_datahub_graph):
     enable_logging()
     output_file_name: str = "tableau_signout_timeout_mces.json"
diff --git a/metadata-ingestion/tests/test_helpers/docker_helpers.py b/metadata-ingestion/tests/test_helpers/docker_helpers.py
index f0db2d91e362c..30157c3a78094 100644
--- a/metadata-ingestion/tests/test_helpers/docker_helpers.py
+++ b/metadata-ingestion/tests/test_helpers/docker_helpers.py
@@ -73,3 +73,26 @@ def run(
             yield docker_services
 
     return run
+
+
+def cleanup_image(image_name: str) -> None:
+    assert ":" not in image_name, "image_name should not contain a tag"
+
+    images_proc = subprocess.run(
+        f"docker image ls --filter 'reference={image_name}*' -q",
+        shell=True,
+        capture_output=True,
+        text=True,
+        check=True,
+    )
+
+    if not images_proc.stdout:
+        logger.debug(f"No images to cleanup for {image_name}")
+        return
+
+    image_ids = images_proc.stdout.splitlines()
+    subprocess.run(
+        f"docker image rm {' '.join(image_ids)}",
+        shell=True,
+        check=True,
+    )

From 419b8a7cc2a506d4f64704a352f5328504d3518f Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Tue, 3 Oct 2023 23:20:32 -0400
Subject: [PATCH 078/156] fix(cli): make quickstart docker compose up command
 more robust (#8929)

---
 .../src/datahub/cli/docker_cli.py             | 23 ++++++++++++-------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/metadata-ingestion/src/datahub/cli/docker_cli.py b/metadata-ingestion/src/datahub/cli/docker_cli.py
index 9fde47c82873c..4afccfe711e34 100644
--- a/metadata-ingestion/src/datahub/cli/docker_cli.py
+++ b/metadata-ingestion/src/datahub/cli/docker_cli.py
@@ -426,7 +426,7 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
     return quickstart_arch
 
 
-@docker.command()
+@docker.command()  # noqa: C901
 @click.option(
     "--version",
     type=str,
@@ -588,7 +588,7 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
         "arch",
     ]
 )
-def quickstart(
+def quickstart(  # noqa: C901
     version: Optional[str],
     build_locally: bool,
     pull_images: bool,
@@ -755,14 +755,21 @@ def quickstart(
             up_attempts += 1
 
             logger.debug(f"Executing docker compose up command, attempt #{up_attempts}")
+            up_process = subprocess.Popen(
+                base_command + ["up", "-d", "--remove-orphans"],
+                env=_docker_subprocess_env(),
+            )
             try:
-                subprocess.run(
-                    base_command + ["up", "-d", "--remove-orphans"],
-                    env=_docker_subprocess_env(),
-                    timeout=_QUICKSTART_UP_TIMEOUT.total_seconds(),
-                )
+                up_process.wait(timeout=_QUICKSTART_UP_TIMEOUT.total_seconds())
             except subprocess.TimeoutExpired:
-                logger.debug("docker compose up timed out, will retry")
+                logger.debug("docker compose up timed out, sending SIGTERM")
+                up_process.terminate()
+                try:
+                    up_process.wait(timeout=3)
+                except subprocess.TimeoutExpired:
+                    logger.debug("docker compose up still running, sending SIGKILL")
+                    up_process.kill()
+                    up_process.wait()
 
         # Check docker health every few seconds.
         status = check_docker_quickstart()

From ad313ad28203ff995b2cfc67f3026228fde81ac7 Mon Sep 17 00:00:00 2001
From: Aseem Bansal <asmbansal2@gmail.com>
Date: Wed, 4 Oct 2023 14:06:03 +0530
Subject: [PATCH 079/156] feat(transfomer): add transformer to get ownership
 from tags (#8748)

---
 docs/how/add-custom-data-platform.md          |  2 +-
 docs/how/add-user-data.md                     |  2 +-
 docs/ownership/ownership-types.md             |  2 +-
 .../docs/transformer/dataset_transformer.md   | 24 ++++-
 metadata-ingestion/setup.py                   |  1 +
 .../extract_ownership_from_tags.py            | 91 +++++++++++++++++++
 .../tests/unit/test_transform_dataset.py      | 89 ++++++++++++++++++
 7 files changed, 207 insertions(+), 4 deletions(-)
 create mode 100644 metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py

diff --git a/docs/how/add-custom-data-platform.md b/docs/how/add-custom-data-platform.md
index a4ea32af455c1..5dcd423e77569 100644
--- a/docs/how/add-custom-data-platform.md
+++ b/docs/how/add-custom-data-platform.md
@@ -77,7 +77,7 @@ datahub put platform --name MyCustomDataPlatform --display_name "My Custom Data
 source:
   type: "file"
   config:
-    filename: "./my-custom-data-platform.json"
+    path: "./my-custom-data-platform.json"
 
 # see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation
 sink:
diff --git a/docs/how/add-user-data.md b/docs/how/add-user-data.md
index ea76c97163ddd..035821ab75879 100644
--- a/docs/how/add-user-data.md
+++ b/docs/how/add-user-data.md
@@ -57,7 +57,7 @@ Define an [ingestion recipe](https://datahubproject.io/docs/metadata-ingestion/#
 source:
   type: "file"
   config:
-    filename: "./my-user.json"
+    path: "./my-user.json"
 
 # see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation
 sink:
diff --git a/docs/ownership/ownership-types.md b/docs/ownership/ownership-types.md
index 243f638a324ad..dbb08dd71ce6b 100644
--- a/docs/ownership/ownership-types.md
+++ b/docs/ownership/ownership-types.md
@@ -85,7 +85,7 @@ source:
   type: "file"
   config:
     # path to json file
-    filename: "metadata-ingestion/examples/ownership/ownership_type.json"
+    path: "metadata-ingestion/examples/ownership/ownership_type.json"
 
 # see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation
 sink:
diff --git a/metadata-ingestion/docs/transformer/dataset_transformer.md b/metadata-ingestion/docs/transformer/dataset_transformer.md
index f0fa44687a109..d1a1555a3ca02 100644
--- a/metadata-ingestion/docs/transformer/dataset_transformer.md
+++ b/metadata-ingestion/docs/transformer/dataset_transformer.md
@@ -7,7 +7,7 @@ The below table shows transformer which can transform aspects of entity [Dataset
 | Dataset Aspect      | Transformer                                                                                                                                                                                                       |                                                                                               
 |---------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | `status`            | - [Mark Dataset status](#mark-dataset-status)                                                                                                                                                                     |
-| `ownership`         | - [Simple Add Dataset ownership](#simple-add-dataset-ownership)<br/> - [Pattern Add Dataset ownership](#pattern-add-dataset-ownership)<br/> - [Simple Remove Dataset Ownership](#simple-remove-dataset-ownership) |
+| `ownership`         | - [Simple Add Dataset ownership](#simple-add-dataset-ownership)<br/> - [Pattern Add Dataset ownership](#pattern-add-dataset-ownership)<br/> - [Simple Remove Dataset Ownership](#simple-remove-dataset-ownership)<br/> - [Extract Ownership from Tags](#extract-ownership-from-tags) |
 | `globalTags`        | - [Simple Add Dataset globalTags ](#simple-add-dataset-globaltags)<br/> - [Pattern Add Dataset globalTags](#pattern-add-dataset-globaltags)<br/> - [Add Dataset globalTags](#add-dataset-globaltags)              |
 | `browsePaths`       | - [Set Dataset browsePath](#set-dataset-browsepath)                                                                                                                                                               |
 | `glossaryTerms`     | - [Simple Add Dataset glossaryTerms ](#simple-add-dataset-glossaryterms)<br/> - [Pattern Add Dataset glossaryTerms](#pattern-add-dataset-glossaryterms)                                                           |
@@ -15,6 +15,28 @@ The below table shows transformer which can transform aspects of entity [Dataset
 | `datasetProperties` | - [Simple Add Dataset datasetProperties](#simple-add-dataset-datasetproperties)<br/> - [Add Dataset datasetProperties](#add-dataset-datasetproperties)                                                            |
 | `domains`           | - [Simple Add Dataset domains](#simple-add-dataset-domains)<br/> - [Pattern Add Dataset domains](#pattern-add-dataset-domains)                                                                                      | 
 
+## Extract Ownership from Tags
+### Config Details
+| Field                       | Required | Type    | Default       | Description                                 |
+|-----------------------------|----------|---------|---------------|---------------------------------------------|
+| `semantics`                 |          | enum    | `OVERWRITE`   | Whether to OVERWRITE or PATCH the entity present on DataHub GMS. |
+| `tag_prefix`                |          | str     |               | Regex to use for tags to match against. Supports Regex to match a prefix which is used to remove content. Rest of string is considered owner ID for creating owner URN. |
+| `is_user`                 |          | bool    | `true`   | Whether should be consider a user or not. If `false` then considered a group. |
+| `email_domain` |          | str    |    | If set then this is appended to create owner URN. |
+| `owner_type` |          | str    |  `TECHNICAL_OWNER`   | Ownership type. |
+| `owner_type_urn` |          | str    |  `None`   | Set to a custom ownership type's URN if using custom ownership. |
+
+Matches against a tag prefix and considers string in tags after that prefix as owner to create ownership.
+
+```yaml
+transformers:
+  - type: "extract_ownership_from_tags"
+    config:
+      tag_prefix: "dbt:techno-genie:"
+      is_user: true
+      email_domain: "coolcompany.com"
+```
+
 ## Mark Dataset Status
 ### Config Details
 | Field                       | Required | Type    | Default       | Description                                 |
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 71e4ea6cb3b85..8fb7b5f29cc22 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -637,6 +637,7 @@ def get_long_description():
         "simple_add_dataset_properties = datahub.ingestion.transformer.add_dataset_properties:SimpleAddDatasetProperties",
         "pattern_add_dataset_schema_terms = datahub.ingestion.transformer.add_dataset_schema_terms:PatternAddDatasetSchemaTerms",
         "pattern_add_dataset_schema_tags = datahub.ingestion.transformer.add_dataset_schema_tags:PatternAddDatasetSchemaTags",
+        "extract_owners_from_tags = datahub.ingestion.transformer.extract_ownership_from_tags:ExtractOwnersFromTagsTransformer",
     ],
     "datahub.ingestion.sink.plugins": [
         "file = datahub.ingestion.sink.file:FileSink",
diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py
new file mode 100644
index 0000000000000..64f70988ea3a7
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py
@@ -0,0 +1,91 @@
+import re
+from functools import lru_cache
+from typing import List, Optional, cast
+
+from datahub.configuration.common import TransformerSemanticsConfigModel
+from datahub.emitter.mce_builder import Aspect
+from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.transformer.dataset_transformer import DatasetTagsTransformer
+from datahub.metadata.schema_classes import (
+    GlobalTagsClass,
+    OwnerClass,
+    OwnershipClass,
+    OwnershipTypeClass,
+)
+from datahub.utilities.urns.corp_group_urn import CorpGroupUrn
+from datahub.utilities.urns.corpuser_urn import CorpuserUrn
+from datahub.utilities.urns.tag_urn import TagUrn
+
+
+class ExtractOwnersFromTagsConfig(TransformerSemanticsConfigModel):
+    tag_prefix: str
+    is_user: bool = True
+    email_domain: Optional[str] = None
+    owner_type: str = "TECHNICAL_OWNER"
+    owner_type_urn: Optional[str] = None
+
+
+@lru_cache(maxsize=10)
+def get_owner_type(owner_type_str: str) -> str:
+    for item in dir(OwnershipTypeClass):
+        if str(item) == owner_type_str:
+            return item
+    return OwnershipTypeClass.CUSTOM
+
+
+class ExtractOwnersFromTagsTransformer(DatasetTagsTransformer):
+    """Transformer that can be used to set extract ownership from entity tags (currently does not support column level tags)"""
+
+    ctx: PipelineContext
+    config: ExtractOwnersFromTagsConfig
+
+    def __init__(self, config: ExtractOwnersFromTagsConfig, ctx: PipelineContext):
+        super().__init__()
+        self.ctx = ctx
+        self.config = config
+
+    @classmethod
+    def create(
+        cls, config_dict: dict, ctx: PipelineContext
+    ) -> "ExtractOwnersFromTagsTransformer":
+        config = ExtractOwnersFromTagsConfig.parse_obj(config_dict)
+        return cls(config, ctx)
+
+    def get_owner_urn(self, owner_str: str) -> str:
+        if self.config.email_domain is not None:
+            return owner_str + "@" + self.config.email_domain
+        return owner_str
+
+    def transform_aspect(
+        self, entity_urn: str, aspect_name: str, aspect: Optional[Aspect]
+    ) -> Optional[Aspect]:
+        in_tags_aspect: Optional[GlobalTagsClass] = cast(GlobalTagsClass, aspect)
+        if in_tags_aspect is None:
+            return None
+        tags = in_tags_aspect.tags
+        owners: List[OwnerClass] = []
+        for tag_class in tags:
+            tag_urn = TagUrn.create_from_string(tag_class.tag)
+            tag_str = tag_urn.get_entity_id()[0]
+            re_match = re.search(self.config.tag_prefix, tag_str)
+            if re_match:
+                owner_str = tag_str[re_match.end() :].strip()
+                owner_urn_str = self.get_owner_urn(owner_str)
+                if self.config.is_user:
+                    owner_urn = str(CorpuserUrn.create_from_id(owner_urn_str))
+                else:
+                    owner_urn = str(CorpGroupUrn.create_from_id(owner_urn_str))
+                owner_type = get_owner_type(self.config.owner_type)
+                if owner_type == OwnershipTypeClass.CUSTOM:
+                    assert (
+                        self.config.owner_type_urn is not None
+                    ), "owner_type_urn must be set if owner_type is CUSTOM"
+                owner = OwnerClass(
+                    owner=owner_urn,
+                    type=owner_type,
+                    typeUrn=self.config.owner_type_urn,
+                )
+                owners.append(owner)
+
+        owner_aspect = OwnershipClass(owners=owners)
+        return cast(Aspect, owner_aspect)
diff --git a/metadata-ingestion/tests/unit/test_transform_dataset.py b/metadata-ingestion/tests/unit/test_transform_dataset.py
index 8b2535eea1fe9..bc95451620d22 100644
--- a/metadata-ingestion/tests/unit/test_transform_dataset.py
+++ b/metadata-ingestion/tests/unit/test_transform_dataset.py
@@ -62,6 +62,9 @@
 )
 from datahub.ingestion.transformer.dataset_transformer import DatasetTransformer
 from datahub.ingestion.transformer.extract_dataset_tags import ExtractDatasetTags
+from datahub.ingestion.transformer.extract_ownership_from_tags import (
+    ExtractOwnersFromTagsTransformer,
+)
 from datahub.ingestion.transformer.mark_dataset_status import MarkDatasetStatus
 from datahub.ingestion.transformer.remove_dataset_ownership import (
     SimpleRemoveDatasetOwnership,
@@ -72,6 +75,7 @@
     GlobalTagsClass,
     MetadataChangeEventClass,
     OwnershipClass,
+    OwnershipTypeClass,
     StatusClass,
     TagAssociationClass,
 )
@@ -586,6 +590,91 @@ def test_mark_status_dataset(tmp_path):
     )
 
 
+def test_extract_owners_from_tags():
+    def _test_owner(
+        tag: str,
+        config: Dict,
+        expected_owner: str,
+        expected_owner_type: Optional[str] = None,
+    ) -> None:
+        dataset = make_generic_dataset(
+            aspects=[
+                models.GlobalTagsClass(
+                    tags=[TagAssociationClass(tag=builder.make_tag_urn(tag))]
+                )
+            ]
+        )
+        transformer = ExtractOwnersFromTagsTransformer.create(
+            config,
+            PipelineContext(run_id="test"),
+        )
+        transformed = list(
+            transformer.transform(
+                [
+                    RecordEnvelope(dataset, metadata={}),
+                ]
+            )
+        )
+        owners_aspect = transformed[0].record.proposedSnapshot.aspects[0]
+        owners = owners_aspect.owners
+        owner = owners[0]
+        if expected_owner_type is not None:
+            assert owner.type == expected_owner_type
+        assert owner.owner == expected_owner
+
+    _test_owner(
+        tag="owner:foo",
+        config={
+            "tag_prefix": "owner:",
+        },
+        expected_owner="urn:li:corpuser:foo",
+    )
+    _test_owner(
+        tag="abcdef-owner:foo",
+        config={
+            "tag_prefix": ".*owner:",
+        },
+        expected_owner="urn:li:corpuser:foo",
+    )
+    _test_owner(
+        tag="owner:foo",
+        config={
+            "tag_prefix": "owner:",
+            "is_user": False,
+        },
+        expected_owner="urn:li:corpGroup:foo",
+    )
+    _test_owner(
+        tag="owner:foo",
+        config={
+            "tag_prefix": "owner:",
+            "email_domain": "example.com",
+        },
+        expected_owner="urn:li:corpuser:foo@example.com",
+    )
+    _test_owner(
+        tag="owner:foo",
+        config={
+            "tag_prefix": "owner:",
+            "email_domain": "example.com",
+            "owner_type": "TECHNICAL_OWNER",
+        },
+        expected_owner="urn:li:corpuser:foo@example.com",
+        expected_owner_type=OwnershipTypeClass.TECHNICAL_OWNER,
+    )
+    _test_owner(
+        tag="owner:foo",
+        config={
+            "tag_prefix": "owner:",
+            "email_domain": "example.com",
+            "owner_type": "AUTHOR",
+            "owner_type_urn": "urn:li:ownershipType:ad8557d6-dcb9-4d2a-83fc-b7d0d54f3e0f",
+        },
+        expected_owner="urn:li:corpuser:foo@example.com",
+        expected_owner_type=OwnershipTypeClass.CUSTOM,
+    )
+
+
 def test_add_dataset_browse_paths():
     dataset = make_generic_dataset()
 

From 52156193b623d554acc2c9564232f033ca5b8989 Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Wed, 4 Oct 2023 17:43:59 +0900
Subject: [PATCH 080/156] docs(lineage): Lineage docs refactoring (#8899)

---
 docs-website/generateDocsDir.ts            |   2 +-
 docs-website/sidebars.js                   |   5 +-
 docs/act-on-metadata/impact-analysis.md    |   2 +-
 docs/api/tutorials/lineage.md              |   3 +-
 docs/features/feature-guides/ui-lineage.md |  58 ++++++
 docs/lineage/lineage-feature-guide.md      | 222 ---------------------
 metadata-ingestion/scripts/docgen.py       | 144 +++++++++++++
 7 files changed, 210 insertions(+), 226 deletions(-)
 create mode 100644 docs/features/feature-guides/ui-lineage.md
 delete mode 100644 docs/lineage/lineage-feature-guide.md

diff --git a/docs-website/generateDocsDir.ts b/docs-website/generateDocsDir.ts
index 892d02c43fe97..a321146e10efa 100644
--- a/docs-website/generateDocsDir.ts
+++ b/docs-website/generateDocsDir.ts
@@ -66,7 +66,7 @@ function list_markdown_files(): string[] {
     .trim()
     .split("\n");
   let all_generated_markdown_files = execSync(
-    "cd .. && ls docs/generated/**/**/*.md"
+    "cd .. && ls docs/generated/**/**/*.md && ls docs/generated/**/*.md"
   )
     .toString()
     .trim()
diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index d8b85da79b31b..bdf3926c17e0d 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -432,7 +432,7 @@ module.exports = {
         "docs/features/dataset-usage-and-query-history",
         "docs/posts",
         "docs/sync-status",
-        "docs/lineage/lineage-feature-guide",
+        "docs/generated/lineage/lineage-feature-guide",
         {
           type: "doc",
           id: "docs/tests/metadata-tests",
@@ -446,6 +446,9 @@ module.exports = {
             "docs/managed-datahub/observe/custom-sql-assertions",
           ],
         },
+        {
+          Guides: ["docs/features/feature-guides/ui-lineage"],
+        },
       ],
     },
     {
diff --git a/docs/act-on-metadata/impact-analysis.md b/docs/act-on-metadata/impact-analysis.md
index 9728a480efe32..e1143dd436d9c 100644
--- a/docs/act-on-metadata/impact-analysis.md
+++ b/docs/act-on-metadata/impact-analysis.md
@@ -92,4 +92,4 @@ We currently limit the list of dependencies to 10,000 records; we suggest applyi
 
 ### Related Features
 
-* [DataHub Lineage](../lineage/lineage-feature-guide.md)
+* [DataHub Lineage](../generated/lineage/lineage-feature-guide.md)
diff --git a/docs/api/tutorials/lineage.md b/docs/api/tutorials/lineage.md
index dc43cb178f949..4baad09099d07 100644
--- a/docs/api/tutorials/lineage.md
+++ b/docs/api/tutorials/lineage.md
@@ -6,7 +6,8 @@ import TabItem from '@theme/TabItem';
 ## Why Would You Use Lineage?
 
 Lineage is used to capture data dependencies within an organization. It allows you to track the inputs from which a data asset is derived, along with the data assets that depend on it downstream.
-For more information about lineage, refer to [About DataHub Lineage](/docs/lineage/lineage-feature-guide.md).
+
+For more information about lineage, refer to [About DataHub Lineage](/docs/generated/lineage/lineage-feature-guide.md).
 
 ### Goal Of This Guide
 
diff --git a/docs/features/feature-guides/ui-lineage.md b/docs/features/feature-guides/ui-lineage.md
new file mode 100644
index 0000000000000..18e4f77e793b2
--- /dev/null
+++ b/docs/features/feature-guides/ui-lineage.md
@@ -0,0 +1,58 @@
+# Managing Lineage via UI
+
+## Viewing lineage
+The UI shows the latest version of the lineage. The time picker can be used to filter out edges within the latest version to exclude those that were last updated outside of the time window. Selecting time windows in the patch will not show you historical lineages. It will only filter the view of the latest version of the lineage. 
+
+## Editing from Lineage Graph View
+
+The first place that you can edit lineage for entities is from the Lineage Visualization screen. Click on the "Lineage" button on the top right of an entity's profile to get to this view.
+
+<p align="center">
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/lineage-viz-button.png"/>
+</p>
+
+Once you find the entity that you want to edit the lineage of, click on the three-dot menu dropdown to select whether you want to edit lineage in the upstream direction or the downstream direction.
+
+<p align="center">
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/edit-lineage-menu.png"/>
+</p>
+
+If you want to edit upstream lineage for entities downstream of the center node or downstream lineage for entities upstream of the center node, you can simply re-center to focus on the node you want to edit. Once focused on the desired node, you can edit lineage in either direction.
+
+<p align="center">
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/focus-to-edit.png"/>
+</p>
+
+### Adding Lineage Edges
+
+Once you click "Edit Upstream" or "Edit Downstream," a modal will open that allows you to manage lineage for the selected entity in the chosen direction. In order to add a lineage edge to a new entity, search for it by name in the provided search bar and select it. Once you're satisfied with everything you've added, click "Save Changes." If you change your mind, you can always cancel or exit without saving the changes you've made.
+
+<p align="center">
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/add-upstream.png"/>
+</p>
+
+### Removing Lineage Edges
+
+You can remove lineage edges from the same modal used to add lineage edges. Find the edge(s) that you want to remove, and click the "X" on the right side of it. And just like adding, you need to click "Save Changes" to save and if you exit without saving, your changes won't be applied.
+
+<p align="center">
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/remove-lineage-edge.png"/>
+</p>
+
+### Reviewing Changes
+
+Any time lineage is edited manually, we keep track of who made the change and when they made it. You can see this information in the modal where you add and remove edges. If an edge was added manually, a user avatar will be in line with the edge that was added. You can hover over this avatar in order to see who added it and when.
+
+<p align="center">
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/lineage-edge-audit-stamp.png"/>
+</p>
+
+## Editing from Lineage Tab
+
+The other place that you can edit lineage for entities is from the Lineage Tab on an entity's profile. Click on the "Lineage" tab in an entity's profile and then find the "Edit" dropdown that allows you to edit upstream or downstream lineage for the given entity.
+
+<p align="center">
+  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/edit-from-lineage-tab.png"/>
+</p>
+
+Using the modal from this view will work the same as described above for editing from the Lineage Visualization screen.
\ No newline at end of file
diff --git a/docs/lineage/lineage-feature-guide.md b/docs/lineage/lineage-feature-guide.md
deleted file mode 100644
index 678afce4c46a0..0000000000000
--- a/docs/lineage/lineage-feature-guide.md
+++ /dev/null
@@ -1,222 +0,0 @@
-import FeatureAvailability from '@site/src/components/FeatureAvailability';
-
-# About DataHub Lineage
-
-<FeatureAvailability/>
-
-Lineage is used to capture data dependencies within an organization. It allows you to track the inputs from which a data asset is derived, along with the data assets that depend on it downstream.
-
-If you're using an ingestion source that supports extraction of Lineage (e.g. the "Table Lineage Capability"), then lineage information can be extracted automatically. For detailed instructions, refer to the source documentation for the source you are using. If you are not using a Lineage-support ingestion source, you can programmatically emit lineage edges between entities via API.
-
-Alternatively, as of `v0.9.5`, DataHub supports the manual editing of lineage between entities. Data experts are free to add or remove upstream and downstream lineage edges in both the Lineage Visualization screen as well as the Lineage tab on entity pages. Use this feature to supplement automatic lineage extraction or establish important entity relationships in sources that do not support automatic extraction. Editing lineage by hand is supported for Datasets, Charts, Dashboards, and Data Jobs.
-
-:::note
-
-Lineage added by hand and programmatically may conflict with one another to cause unwanted overwrites. It is strongly recommend that lineage is edited manually in cases where lineage information is not also extracted in automated fashion, e.g. by running an ingestion source.
-
-:::
-
-Types of lineage connections supported in DataHub are:
-
-* Dataset-to-dataset
-* Pipeline lineage (dataset-to-job-to-dataset)
-* Dashboard-to-chart lineage
-* Chart-to-dataset lineage
-* Job-to-dataflow (dbt lineage)
-
-## Lineage Setup, Prerequisites, and Permissions
-
-To edit lineage for an entity, you'll need the following [Metadata Privilege](../authorization/policies.md):
-
-* **Edit Lineage** metadata privilege to edit lineage at the entity level
-
-It is important to know that the **Edit Lineage** privilege is required for all entities whose lineage is affected by the changes. For example, in order to add "Dataset B" as an upstream dependency of "Dataset A", you'll need the **Edit Lineage** privilege for both Dataset A and Dataset B.
-
-## Managing Lineage via the DataHub UI
-
-### Viewing lineage on the Datahub UI
-The UI shows the latest version of the lineage. The time picker can be used to filter out edges within the latest version to exclude those that were last updated outside of the time window. Selecting time windows in the patch will not show you historical lineages. It will only filter the view of the latest version of the lineage. 
-
-### Editing from Lineage Graph View
-
-The first place that you can edit lineage for entities is from the Lineage Visualization screen. Click on the "Lineage" button on the top right of an entity's profile to get to this view.
-
-<p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/lineage-viz-button.png"/>
-</p>
-
-Once you find the entity that you want to edit the lineage of, click on the three-dot menu dropdown to select whether you want to edit lineage in the upstream direction or the downstream direction.
-
-<p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/edit-lineage-menu.png"/>
-</p>
-
-If you want to edit upstream lineage for entities downstream of the center node or downstream lineage for entities upstream of the center node, you can simply re-center to focus on the node you want to edit. Once focused on the desired node, you can edit lineage in either direction.
-
-<p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/focus-to-edit.png"/>
-</p>
-
-#### Adding Lineage Edges
-
-Once you click "Edit Upstream" or "Edit Downstream," a modal will open that allows you to manage lineage for the selected entity in the chosen direction. In order to add a lineage edge to a new entity, search for it by name in the provided search bar and select it. Once you're satisfied with everything you've added, click "Save Changes." If you change your mind, you can always cancel or exit without saving the changes you've made.
-
-<p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/add-upstream.png"/>
-</p>
-
-#### Removing Lineage Edges
-
-You can remove lineage edges from the same modal used to add lineage edges. Find the edge(s) that you want to remove, and click the "X" on the right side of it. And just like adding, you need to click "Save Changes" to save and if you exit without saving, your changes won't be applied.
-
-<p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/remove-lineage-edge.png"/>
-</p>
-
-#### Reviewing Changes
-
-Any time lineage is edited manually, we keep track of who made the change and when they made it. You can see this information in the modal where you add and remove edges. If an edge was added manually, a user avatar will be in line with the edge that was added. You can hover over this avatar in order to see who added it and when.
-
-<p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/lineage-edge-audit-stamp.png"/>
-</p>
-
-### Editing from Lineage Tab
-
-The other place that you can edit lineage for entities is from the Lineage Tab on an entity's profile. Click on the "Lineage" tab in an entity's profile and then find the "Edit" dropdown that allows you to edit upstream or downstream lineage for the given entity.
-
-<p align="center">
-  <img width="70%"  src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/edit-from-lineage-tab.png"/>
-</p>
-
-Using the modal from this view will work the same as described above for editing from the Lineage Visualization screen.
-
-## Managing Lineage via API
-
-:::note
-
-   When you emit any lineage aspect, the existing aspect gets completely overwritten, unless specifically using patch semantics.
-This means that the latest version visible in the UI will be your version. 
-
-:::
-
-### Using Dataset-to-Dataset Lineage
-
-This relationship model uses dataset -> dataset connection through the UpstreamLineage aspect in the Dataset entity.
-
-Here are a few samples for the usage of this type of lineage:
-
-* [lineage_emitter_mcpw_rest.py](../../metadata-ingestion/examples/library/lineage_emitter_mcpw_rest.py) - emits simple bigquery table-to-table (dataset-to-dataset) lineage via REST as MetadataChangeProposalWrapper.
-* [lineage_emitter_rest.py](../../metadata-ingestion/examples/library/lineage_emitter_rest.py) - emits simple dataset-to-dataset lineage via REST as MetadataChangeEvent.
-* [lineage_emitter_kafka.py](../../metadata-ingestion/examples/library/lineage_emitter_kafka.py) - emits simple dataset-to-dataset lineage via Kafka as MetadataChangeEvent.
-* [lineage_emitter_dataset_finegrained.py](../../metadata-ingestion/examples/library/lineage_emitter_dataset_finegrained.py) - emits fine-grained dataset-dataset lineage via REST as MetadataChangeProposalWrapper.
-* [Datahub Snowflake Lineage](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py) - emits Datahub's Snowflake lineage as MetadataChangeProposalWrapper.
-* [Datahub BigQuery Lineage](https://github.com/datahub-project/datahub/blob/3022c2d12e68d221435c6134362c1a2cba2df6b3/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py#L1028) - emits Datahub's Bigquery lineage as MetadataChangeProposalWrapper. **Use the patch feature to add to rather than overwrite the current lineage.**
-
-### Using dbt Lineage
-
-This model captures dbt specific nodes (tables, views, etc.) and
-
-* uses datasets as the base entity type and
-* extends subclass datasets for each dbt-specific concept, and
-* links them together for dataset-to-dataset lineage
-
-Here is a sample usage of this lineage:
-
-* [Datahub dbt Lineage](https://github.com/datahub-project/datahub/blob/a9754ebe83b6b73bc2bfbf49d9ebf5dbd2ca5a8f/metadata-ingestion/src/datahub/ingestion/source/dbt.py#L625,L630) - emits Datahub's dbt lineage as MetadataChangeEvent.
-
-### Using Pipeline Lineage
-
-The relationship model for this is datajob-to-dataset through the dataJobInputOutput aspect in the DataJob entity.
-
-For Airflow, this lineage is supported using Airflow’s lineage backend which allows you to specify the inputs to and output from that task.
- 
-If you annotate that on your task we can pick up that information and push that as lineage edges into datahub automatically. You can install this package from Airflow’s Astronomer marketplace [here](https://registry.astronomer.io/providers/datahub).
-
-Here are a few samples for the usage of this type of lineage:
-
-* [lineage_dataset_job_dataset.py](../../metadata-ingestion/examples/library/lineage_dataset_job_dataset.py) - emits mysql-to-airflow-to-kafka (dataset-to-job-to-dataset) lineage via REST as MetadataChangeProposalWrapper.
-* [lineage_job_dataflow.py](../../metadata-ingestion/examples/library/lineage_job_dataflow.py) - emits the job-to-dataflow lineage via REST as MetadataChangeProposalWrapper.
-
-### Using Dashboard-to-Chart Lineage
-
-This relationship model uses the dashboardInfo aspect of the Dashboard entity and models an explicit edge between a dashboard and a chart (such that charts can be attached to multiple dashboards).
-
-Here is a sample usage of this lineage:
-
-* [lineage_chart_dashboard.py](../../metadata-ingestion/examples/library/lineage_chart_dashboard.py) - emits the chart-to-dashboard lineage via REST as MetadataChangeProposalWrapper.
-
-### Using Chart-to-Dataset Lineage
-
-This relationship model uses the chartInfo aspect of the Chart entity.
-
-Here is a sample usage of this lineage:
-
-* [lineage_dataset_chart.py](../../metadata-ingestion/examples/library/lineage_dataset_chart.py) - emits the dataset-to-chart lineage via REST as MetadataChangeProposalWrapper.
-
-## Additional Resources
-
-### Videos
-
-**DataHub Basics: Lineage 101**
-
-<p align="center">
-<iframe width="560" height="315" src="https://www.youtube.com/embed/rONGpsndzRw" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
-</p>
-
-**DataHub November 2022 Town Hall - Including Manual Lineage Demo**
-
-<p align="center">
-<iframe width="560" height="315" src="https://www.youtube.com/embed/BlCLhG8lGoY" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
-</p>
-
-### GraphQL
-
-* [updateLineage](../../graphql/mutations.md#updatelineage)
-* [searchAcrossLineage](../../graphql/queries.md#searchacrosslineage)
-* [searchAcrossLineageInput](../../graphql/inputObjects.md#searchacrosslineageinput)
-
-#### Examples
-
-**Updating Lineage**
-
-```graphql
-mutation updateLineage {
-  updateLineage(input: {
-    edgesToAdd: [
-      {
-        downstreamUrn: "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)",
-        upstreamUrn: "urn:li:dataset:(urn:li:dataPlatform:datahub,Dataset,PROD)"
-      }
-    ],
-    edgesToRemove: [
-      {
-        downstreamUrn: "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)",
-        upstreamUrn: "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)"
-      }
-    ]
-  })
-}
-```
-
-### DataHub Blog
-
-* [Acryl Data introduces lineage support and automated propagation of governance information for Snowflake in DataHub](https://blog.datahubproject.io/acryl-data-introduces-lineage-support-and-automated-propagation-of-governance-information-for-339c99536561)
-* [Data in Context: Lineage Explorer in DataHub](https://blog.datahubproject.io/data-in-context-lineage-explorer-in-datahub-a53a9a476dc4)
-* [Harnessing the Power of Data Lineage with DataHub](https://blog.datahubproject.io/harnessing-the-power-of-data-lineage-with-datahub-ad086358dec4)
-
-## FAQ and Troubleshooting
-
-**The Lineage Tab is greyed out - why can’t I click on it?**
-
-This means you have not yet ingested lineage metadata for that entity. Please ingest lineage to proceed.
-
-**Are there any recommended practices for emitting lineage?**
-
-We recommend emitting aspects as MetadataChangeProposalWrapper over emitting them via the MetadataChangeEvent.
-
-*Need more help? Join the conversation in [Slack](http://slack.datahubproject.io)!*
-
-### Related Features
-
-* [DataHub Lineage Impact Analysis](../act-on-metadata/impact-analysis.md)
diff --git a/metadata-ingestion/scripts/docgen.py b/metadata-ingestion/scripts/docgen.py
index b9f558011fc90..1a4db09e961ce 100644
--- a/metadata-ingestion/scripts/docgen.py
+++ b/metadata-ingestion/scripts/docgen.py
@@ -883,6 +883,150 @@ def generate(
     if metrics["plugins"].get("failed", 0) > 0:  # type: ignore
         sys.exit(1)
 
+    ### Create Lineage doc
+
+    source_dir = "../docs/generated/lineage"
+    os.makedirs(source_dir, exist_ok=True)
+    doc_file = f"{source_dir}/lineage-feature-guide.md"
+    with open(doc_file, "w+") as f:
+        f.write("import FeatureAvailability from '@site/src/components/FeatureAvailability';\n\n")
+        f.write(f"# About DataHub Lineage\n\n")
+        f.write("<FeatureAvailability/>\n")
+
+        f.write("""
+Lineage is used to capture data dependencies within an organization. It allows you to track the inputs from which a data asset is derived, along with the data assets that depend on it downstream.
+
+## Viewing Lineage
+
+You can view lineage under **Lineage** tab or **Lineage Visualization** screen.
+
+<p align="center">
+<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/lineage-tab.png" />
+</p>
+
+The UI shows the latest version of the lineage. The time picker can be used to filter out edges within the latest version to exclude those that were last updated outside of the time window. Selecting time windows in the patch will not show you historical lineages. It will only filter the view of the latest version of the lineage.
+
+<p align="center">
+<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/lineage-view.png" />
+</p>
+
+
+:::tip The Lineage Tab is greyed out - why can’t I click on it?
+This means you have not yet ingested lineage metadata for that entity. Please ingest lineage to proceed.
+
+:::
+
+## Adding Lineage
+
+### Ingestion Source
+
+If you're using an ingestion source that supports extraction of Lineage (e.g. **Table Lineage Capability**), then lineage information can be extracted automatically.
+For detailed instructions, refer to the [source documentation](https://datahubproject.io/integrations) for the source you are using.
+
+### UI
+
+As of `v0.9.5`, DataHub supports the manual editing of lineage between entities. Data experts are free to add or remove upstream and downstream lineage edges in both the Lineage Visualization screen as well as the Lineage tab on entity pages. Use this feature to supplement automatic lineage extraction or establish important entity relationships in sources that do not support automatic extraction. Editing lineage by hand is supported for Datasets, Charts, Dashboards, and Data Jobs.
+Please refer to our [UI Guides on Lineage](../../features/feature-guides/ui-lineage.md) for more information.
+
+:::caution Recommendation on UI-based lineage
+
+Lineage added by hand and programmatically may conflict with one another to cause unwanted overwrites.
+It is strongly recommend that lineage is edited manually in cases where lineage information is not also extracted in automated fashion, e.g. by running an ingestion source.
+
+:::
+
+### API
+
+If you are not using a Lineage-support ingestion source, you can programmatically emit lineage edges between entities via API.
+Please refer to [API Guides on Lineage](../../api/tutorials/lineage.md) for more information.
+
+
+## Lineage Support
+
+### Automatic Lineage Extraction Support
+
+This is a summary of automatic lineage extraciton support in our data source. Please refer to the **Important Capabilities** table in the source documentation. Note that even if the source does not support automatic extraction, you can still add lineage manually using our API & SDKs.\n""")
+
+        f.write("\n| Source | Table-Level Lineage | Column-Level Lineage | Related Configs |\n")
+        f.write("| ---------- | ------ | ----- |----- |\n")
+
+        for platform_id, platform_docs in sorted(
+                source_documentation.items(),
+                key=lambda x: (x[1]["name"].casefold(), x[1]["name"])
+                if "name" in x[1]
+                else (x[0].casefold(), x[0]),
+        ):
+            for plugin, plugin_docs in sorted(
+                    platform_docs["plugins"].items(),
+                    key=lambda x: str(x[1].get("doc_order"))
+                    if x[1].get("doc_order")
+                    else x[0],
+            ):
+                platform_name = platform_docs['name']
+                if len(platform_docs["plugins"].keys()) > 1:
+                    # We only need to show this if there are multiple modules.
+                    platform_name = f"{platform_name} `{plugin}`"
+
+                # Initialize variables
+                table_level_supported = "❌"
+                column_level_supported = "❌"
+                config_names = ''
+
+                if "capabilities" in plugin_docs:
+                    plugin_capabilities = plugin_docs["capabilities"]
+
+                    for cap_setting in plugin_capabilities:
+                        capability_text = get_capability_text(cap_setting.capability)
+                        capability_supported = get_capability_supported_badge(cap_setting.supported)
+
+                        if capability_text == "Table-Level Lineage" and capability_supported == "✅":
+                            table_level_supported = "✅"
+
+                        if capability_text == "Column-level Lineage" and capability_supported == "✅":
+                            column_level_supported = "✅"
+
+                if not (table_level_supported == "❌" and column_level_supported == "❌"):
+                    if "config_schema" in plugin_docs:
+                        config_properties = json.loads(plugin_docs['config_schema']).get('properties', {})
+                        config_names = '<br />'.join(
+                            [f'- {property_name}' for property_name in config_properties if 'lineage' in property_name])
+                lineage_not_applicable_sources = ['azure-ad', 'csv', 'demo-data', 'dynamodb', 'iceberg', 'json-schema', 'ldap', 'openapi', 'pulsar', 'sqlalchemy' ]
+                if platform_id not in lineage_not_applicable_sources :
+                    f.write(
+                        f"| [{platform_name}](../../generated/ingestion/sources/{platform_id}.md) | {table_level_supported} | {column_level_supported} | {config_names}|\n"
+                    )
+
+        f.write("""
+
+### Types of Lineage Connections
+
+Types of lineage connections supported in DataHub and the example codes are as follows.
+
+| Connection          | Examples                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              | A.K.A           |
+|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|
+| Dataset to Dataset  | - [lineage_emitter_mcpw_rest.py](../../../metadata-ingestion/examples/library/lineage_emitter_mcpw_rest.py) <br /> - [lineage_emitter_rest.py](../../../metadata-ingestion/examples/library/lineage_emitter_rest.py) <br /> - [lineage_emitter_kafka.py](../../../metadata-ingestion/examples/library/lineage_emitter_kafka.py) <br /> - [lineage_emitter_dataset_finegrained.py](../../../metadata-ingestion/examples/library/lineage_emitter_dataset_finegrained.py) <br /> - [Datahub BigQuery Lineage](https://github.com/datahub-project/datahub/blob/a1bf95307b040074c8d65ebb86b5eb177fdcd591/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py#L229) <br /> - [Datahub Snowflake Lineage](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/sql/snowflake.py#L249) |
+| DataJob to DataFlow | - [lineage_job_dataflow.py](../../../metadata-ingestion/examples/library/lineage_job_dataflow.py)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |    |
+| DataJob to Dataset  | - [lineage_dataset_job_dataset.py](../../../metadata-ingestion/examples/library/lineage_dataset_job_dataset.py) <br /> | Pipeline Lineage |
+| Chart to Dashboard  | - [lineage_chart_dashboard.py](../../../metadata-ingestion/examples/library/lineage_chart_dashboard.py)  |  |
+| Chart to Dataset    | - [lineage_dataset_chart.py](../../../metadata-ingestion/examples/library/lineage_dataset_chart.py)  |  |
+
+
+:::tip Our Roadmap
+We're actively working on expanding lineage support for new data sources.
+Visit our [Official Roadmap](https://feature-requests.datahubproject.io/roadmap) for upcoming updates!
+:::
+
+## References
+
+- [DataHub Basics: Lineage 101](https://www.youtube.com/watch?v=rONGpsndzRw&t=1s)
+- [DataHub November 2022 Town Hall](https://www.youtube.com/watch?v=BlCLhG8lGoY&t=1s) - Including Manual Lineage Demo
+- [Acryl Data introduces lineage support and automated propagation of governance information for Snowflake in DataHub](https://blog.datahubproject.io/acryl-data-introduces-lineage-support-and-automated-propagation-of-governance-information-for-339c99536561)
+- [Data in Context: Lineage Explorer in DataHub](https://blog.datahubproject.io/data-in-context-lineage-explorer-in-datahub-a53a9a476dc4)
+- [Harnessing the Power of Data Lineage with DataHub](https://blog.datahubproject.io/harnessing-the-power-of-data-lineage-with-datahub-ad086358dec4)
+- [DataHub Lineage Impact Analysis](https://datahubproject.io/docs/next/act-on-metadata/impact-analysis)
+                        """)
+
+    print("Lineage Documentation Generation Complete")
 
 if __name__ == "__main__":
     logger.setLevel("INFO")

From c415d63ddae884de4e7a5d4ff3311f82057d3a78 Mon Sep 17 00:00:00 2001
From: siddiquebagwan-gslab <mohdsiddique.bagwan@gslab.com>
Date: Wed, 4 Oct 2023 16:22:51 +0530
Subject: [PATCH 081/156] feat(ingestion/powerbi): column level lineage
 extraction for M-Query (#8796)

---
 .../docs/sources/powerbi/powerbi_pre.md       |    2 +-
 .../ingestion/source/powerbi/config.py        |   36 +
 .../powerbi/m_query/native_sql_parser.py      |    6 +-
 .../source/powerbi/m_query/parser.py          |    2 +-
 .../source/powerbi/m_query/resolver.py        |  189 ++-
 .../ingestion/source/powerbi/powerbi.py       |  102 +-
 .../integration/powerbi/golden_test_cll.json  | 1357 +++++++++++++++++
 .../integration/powerbi/test_m_parser.py      |  155 +-
 .../tests/integration/powerbi/test_powerbi.py |   95 +-
 9 files changed, 1804 insertions(+), 140 deletions(-)
 create mode 100644 metadata-ingestion/tests/integration/powerbi/golden_test_cll.json

diff --git a/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md b/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md
index 0323e214045ae..fcfae6cd1e6d7 100644
--- a/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md
+++ b/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md
@@ -40,7 +40,7 @@ PowerBI Source supports M-Query expression for below listed PowerBI Data Sources
 4.  Microsoft SQL Server
 5.  Google BigQuery
 
-Native SQL query parsing is supported for `Snowflake` and `Amazon Redshift` data-sources and only first table from `FROM` clause will be ingested as upstream table. Advance SQL construct like JOIN and SUB-QUERIES in `FROM` clause are not supported.
+Native SQL query parsing is supported for `Snowflake` and `Amazon Redshift` data-sources.
 
 For example refer below native SQL query. The table `OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_UNIT_TARGET` will be ingested as upstream table.
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
index ffa685fb25826..a8c7e48f3785c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -397,6 +397,42 @@ class PowerBiDashboardSourceConfig(
         "as this option generates the upstream datasets URN in lowercase.",
     )
 
+    # Enable CLL extraction
+    extract_column_level_lineage: bool = pydantic.Field(
+        default=False,
+        description="Whether to extract column level lineage. "
+        "Works only if configs `native_query_parsing`, `enable_advance_lineage_sql_construct` & `extract_lineage` are enabled.  "
+        "Works for M-Query where native SQL is used for transformation.",
+    )
+
+    @root_validator
+    @classmethod
+    def validate_extract_column_level_lineage(cls, values: Dict) -> Dict:
+        flags = [
+            "native_query_parsing",
+            "enable_advance_lineage_sql_construct",
+            "extract_lineage",
+        ]
+
+        if (
+            "extract_column_level_lineage" in values
+            and values["extract_column_level_lineage"] is False
+        ):
+            # Flag is not set. skip validation
+            return values
+
+        logger.debug(f"Validating additional flags: {flags}")
+
+        is_flag_enabled: bool = True
+        for flag in flags:
+            if flag not in values or values[flag] is False:
+                is_flag_enabled = False
+
+        if not is_flag_enabled:
+            raise ValueError(f"Enable all these flags in recipe: {flags} ")
+
+        return values
+
     @validator("dataset_type_mapping")
     @classmethod
     def map_data_platform(cls, value):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py
index 021c429c3c633..0afa8e7ff4564 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py
@@ -9,7 +9,7 @@
 
 SPECIAL_CHARACTERS = ["#(lf)", "(lf)"]
 
-logger = logging.getLogger()
+logger = logging.getLogger(__name__)
 
 
 def remove_special_characters(native_query: str) -> str:
@@ -21,7 +21,7 @@ def remove_special_characters(native_query: str) -> str:
 
 def get_tables(native_query: str) -> List[str]:
     native_query = remove_special_characters(native_query)
-    logger.debug(f"Processing query = {native_query}")
+    logger.debug(f"Processing native query = {native_query}")
     tables: List[str] = []
     parsed = sqlparse.parse(native_query)[0]
     tokens: List[sqlparse.sql.Token] = list(parsed.tokens)
@@ -65,7 +65,7 @@ def parse_custom_sql(
 
     sql_query = remove_special_characters(query)
 
-    logger.debug(f"Parsing sql={sql_query}")
+    logger.debug(f"Processing native query = {sql_query}")
 
     return sqlglot_l.create_lineage_sql_parsed_result(
         query=sql_query,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
index 8cc38c366c42a..9134932c39fe0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
@@ -56,7 +56,7 @@ def get_upstream_tables(
     ctx: PipelineContext,
     config: PowerBiDashboardSourceConfig,
     parameters: Dict[str, str] = {},
-) -> List[resolver.DataPlatformTable]:
+) -> List[resolver.Lineage]:
     if table.expression is None:
         logger.debug(f"Expression is none for table {table.full_name}")
         return []
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
index 479f1decff903..e200ff41f71c2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
@@ -27,7 +27,7 @@
     IdentifierAccessor,
 )
 from datahub.ingestion.source.powerbi.rest_api_wrapper.data_classes import Table
-from datahub.utilities.sqlglot_lineage import SqlParsingResult
+from datahub.utilities.sqlglot_lineage import ColumnLineageInfo, SqlParsingResult
 
 logger = logging.getLogger(__name__)
 
@@ -38,6 +38,16 @@ class DataPlatformTable:
     urn: str
 
 
+@dataclass
+class Lineage:
+    upstreams: List[DataPlatformTable]
+    column_lineage: List[ColumnLineageInfo]
+
+    @staticmethod
+    def empty() -> "Lineage":
+        return Lineage(upstreams=[], column_lineage=[])
+
+
 def urn_to_lowercase(value: str, flag: bool) -> str:
     if flag is True:
         return value.lower()
@@ -120,9 +130,9 @@ def __init__(
         self.platform_instance_resolver = platform_instance_resolver
 
     @abstractmethod
-    def create_dataplatform_tables(
+    def create_lineage(
         self, data_access_func_detail: DataAccessFunctionDetail
-    ) -> List[DataPlatformTable]:
+    ) -> Lineage:
         pass
 
     @abstractmethod
@@ -147,7 +157,7 @@ def get_db_detail_from_argument(
 
     def parse_custom_sql(
         self, query: str, server: str, database: Optional[str], schema: Optional[str]
-    ) -> List[DataPlatformTable]:
+    ) -> Lineage:
 
         dataplatform_tables: List[DataPlatformTable] = []
 
@@ -174,7 +184,7 @@ def parse_custom_sql(
 
         if parsed_result is None:
             logger.debug("Failed to parse query")
-            return dataplatform_tables
+            return Lineage.empty()
 
         for urn in parsed_result.in_tables:
             dataplatform_tables.append(
@@ -184,9 +194,15 @@ def parse_custom_sql(
                 )
             )
 
+        logger.debug(f"Native Query parsed result={parsed_result}")
         logger.debug(f"Generated dataplatform_tables={dataplatform_tables}")
 
-        return dataplatform_tables
+        return Lineage(
+            upstreams=dataplatform_tables,
+            column_lineage=parsed_result.column_lineage
+            if parsed_result.column_lineage is not None
+            else [],
+        )
 
 
 class AbstractDataAccessMQueryResolver(ABC):
@@ -215,7 +231,7 @@ def resolve_to_data_platform_table_list(
         ctx: PipelineContext,
         config: PowerBiDashboardSourceConfig,
         platform_instance_resolver: AbstractDataPlatformInstanceResolver,
-    ) -> List[DataPlatformTable]:
+    ) -> List[Lineage]:
         pass
 
 
@@ -471,8 +487,8 @@ def resolve_to_data_platform_table_list(
         ctx: PipelineContext,
         config: PowerBiDashboardSourceConfig,
         platform_instance_resolver: AbstractDataPlatformInstanceResolver,
-    ) -> List[DataPlatformTable]:
-        data_platform_tables: List[DataPlatformTable] = []
+    ) -> List[Lineage]:
+        lineage: List[Lineage] = []
 
         # Find out output variable as we are doing backtracking in M-Query
         output_variable: Optional[str] = tree_function.get_output_variable(
@@ -484,7 +500,7 @@ def resolve_to_data_platform_table_list(
                 f"{self.table.full_name}-output-variable",
                 "output-variable not found in table expression",
             )
-            return data_platform_tables
+            return lineage
 
         # Parse M-Query and use output_variable as root of tree and create instance of DataAccessFunctionDetail
         table_links: List[
@@ -509,7 +525,7 @@ def resolve_to_data_platform_table_list(
 
             # From supported_resolver enum get respective resolver like AmazonRedshift or Snowflake or Oracle or NativeQuery and create instance of it
             # & also pass additional information that will be need to generate urn
-            table_full_name_creator: AbstractDataPlatformTableCreator = (
+            table_qualified_name_creator: AbstractDataPlatformTableCreator = (
                 supported_resolver.get_table_full_name_creator()(
                     ctx=ctx,
                     config=config,
@@ -517,11 +533,9 @@ def resolve_to_data_platform_table_list(
                 )
             )
 
-            data_platform_tables.extend(
-                table_full_name_creator.create_dataplatform_tables(f_detail)
-            )
+            lineage.append(table_qualified_name_creator.create_lineage(f_detail))
 
-        return data_platform_tables
+        return lineage
 
 
 class DefaultTwoStepDataAccessSources(AbstractDataPlatformTableCreator, ABC):
@@ -536,7 +550,7 @@ class DefaultTwoStepDataAccessSources(AbstractDataPlatformTableCreator, ABC):
 
     def two_level_access_pattern(
         self, data_access_func_detail: DataAccessFunctionDetail
-    ) -> List[DataPlatformTable]:
+    ) -> Lineage:
         logger.debug(
             f"Processing {self.get_platform_pair().powerbi_data_platform_name} data-access function detail {data_access_func_detail}"
         )
@@ -545,7 +559,7 @@ def two_level_access_pattern(
             data_access_func_detail.arg_list
         )
         if server is None or db_name is None:
-            return []  # Return empty list
+            return Lineage.empty()  # Return empty list
 
         schema_name: str = cast(
             IdentifierAccessor, data_access_func_detail.identifier_accessor
@@ -568,19 +582,21 @@ def two_level_access_pattern(
             server=server,
             qualified_table_name=qualified_table_name,
         )
-
-        return [
-            DataPlatformTable(
-                data_platform_pair=self.get_platform_pair(),
-                urn=urn,
-            )
-        ]
+        return Lineage(
+            upstreams=[
+                DataPlatformTable(
+                    data_platform_pair=self.get_platform_pair(),
+                    urn=urn,
+                )
+            ],
+            column_lineage=[],
+        )
 
 
 class PostgresDataPlatformTableCreator(DefaultTwoStepDataAccessSources):
-    def create_dataplatform_tables(
+    def create_lineage(
         self, data_access_func_detail: DataAccessFunctionDetail
-    ) -> List[DataPlatformTable]:
+    ) -> Lineage:
         return self.two_level_access_pattern(data_access_func_detail)
 
     def get_platform_pair(self) -> DataPlatformPair:
@@ -630,10 +646,10 @@ def create_urn_using_old_parser(
 
         return dataplatform_tables
 
-    def create_dataplatform_tables(
+    def create_lineage(
         self, data_access_func_detail: DataAccessFunctionDetail
-    ) -> List[DataPlatformTable]:
-        dataplatform_tables: List[DataPlatformTable] = []
+    ) -> Lineage:
+
         arguments: List[str] = tree_function.strip_char_from_list(
             values=tree_function.remove_whitespaces_from_list(
                 tree_function.token_values(data_access_func_detail.arg_list)
@@ -647,14 +663,17 @@ def create_dataplatform_tables(
 
         if len(arguments) >= 4 and arguments[2] != "Query":
             logger.debug("Unsupported case is found. Second index is not the Query")
-            return dataplatform_tables
+            return Lineage.empty()
 
         if self.config.enable_advance_lineage_sql_construct is False:
             # Use previous parser to generate URN to keep backward compatibility
-            return self.create_urn_using_old_parser(
-                query=arguments[3],
-                db_name=arguments[1],
-                server=arguments[0],
+            return Lineage(
+                upstreams=self.create_urn_using_old_parser(
+                    query=arguments[3],
+                    db_name=arguments[1],
+                    server=arguments[0],
+                ),
+                column_lineage=[],
             )
 
         return self.parse_custom_sql(
@@ -684,9 +703,9 @@ def _get_server_and_db_name(value: str) -> Tuple[Optional[str], Optional[str]]:
 
         return tree_function.strip_char_from_list([splitter_result[0]])[0], db_name
 
-    def create_dataplatform_tables(
+    def create_lineage(
         self, data_access_func_detail: DataAccessFunctionDetail
-    ) -> List[DataPlatformTable]:
+    ) -> Lineage:
         logger.debug(
             f"Processing Oracle data-access function detail {data_access_func_detail}"
         )
@@ -698,7 +717,7 @@ def create_dataplatform_tables(
         server, db_name = self._get_server_and_db_name(arguments[0])
 
         if db_name is None or server is None:
-            return []
+            return Lineage.empty()
 
         schema_name: str = cast(
             IdentifierAccessor, data_access_func_detail.identifier_accessor
@@ -719,18 +738,21 @@ def create_dataplatform_tables(
             qualified_table_name=qualified_table_name,
         )
 
-        return [
-            DataPlatformTable(
-                data_platform_pair=self.get_platform_pair(),
-                urn=urn,
-            )
-        ]
+        return Lineage(
+            upstreams=[
+                DataPlatformTable(
+                    data_platform_pair=self.get_platform_pair(),
+                    urn=urn,
+                )
+            ],
+            column_lineage=[],
+        )
 
 
 class DatabrickDataPlatformTableCreator(AbstractDataPlatformTableCreator):
-    def create_dataplatform_tables(
+    def create_lineage(
         self, data_access_func_detail: DataAccessFunctionDetail
-    ) -> List[DataPlatformTable]:
+    ) -> Lineage:
         logger.debug(
             f"Processing Databrick data-access function detail {data_access_func_detail}"
         )
@@ -749,7 +771,7 @@ def create_dataplatform_tables(
                 logger.debug(
                     "expecting instance to be IdentifierAccessor, please check if parsing is done properly"
                 )
-                return []
+                return Lineage.empty()
 
         db_name: str = value_dict["Database"]
         schema_name: str = value_dict["Schema"]
@@ -762,7 +784,7 @@ def create_dataplatform_tables(
             logger.info(
                 f"server information is not available for {qualified_table_name}. Skipping upstream table"
             )
-            return []
+            return Lineage.empty()
 
         urn = urn_creator(
             config=self.config,
@@ -772,12 +794,15 @@ def create_dataplatform_tables(
             qualified_table_name=qualified_table_name,
         )
 
-        return [
-            DataPlatformTable(
-                data_platform_pair=self.get_platform_pair(),
-                urn=urn,
-            )
-        ]
+        return Lineage(
+            upstreams=[
+                DataPlatformTable(
+                    data_platform_pair=self.get_platform_pair(),
+                    urn=urn,
+                )
+            ],
+            column_lineage=[],
+        )
 
     def get_platform_pair(self) -> DataPlatformPair:
         return SupportedDataPlatform.DATABRICK_SQL.value
@@ -789,9 +814,9 @@ def get_datasource_server(
     ) -> str:
         return tree_function.strip_char_from_list([arguments[0]])[0]
 
-    def create_dataplatform_tables(
+    def create_lineage(
         self, data_access_func_detail: DataAccessFunctionDetail
-    ) -> List[DataPlatformTable]:
+    ) -> Lineage:
         logger.debug(
             f"Processing {self.get_platform_pair().datahub_data_platform_name} function detail {data_access_func_detail}"
         )
@@ -826,12 +851,15 @@ def create_dataplatform_tables(
             qualified_table_name=qualified_table_name,
         )
 
-        return [
-            DataPlatformTable(
-                data_platform_pair=self.get_platform_pair(),
-                urn=urn,
-            )
-        ]
+        return Lineage(
+            upstreams=[
+                DataPlatformTable(
+                    data_platform_pair=self.get_platform_pair(),
+                    urn=urn,
+                )
+            ],
+            column_lineage=[],
+        )
 
 
 class SnowflakeDataPlatformTableCreator(DefaultThreeStepDataAccessSources):
@@ -859,9 +887,9 @@ class AmazonRedshiftDataPlatformTableCreator(AbstractDataPlatformTableCreator):
     def get_platform_pair(self) -> DataPlatformPair:
         return SupportedDataPlatform.AMAZON_REDSHIFT.value
 
-    def create_dataplatform_tables(
+    def create_lineage(
         self, data_access_func_detail: DataAccessFunctionDetail
-    ) -> List[DataPlatformTable]:
+    ) -> Lineage:
         logger.debug(
             f"Processing AmazonRedshift data-access function detail {data_access_func_detail}"
         )
@@ -870,7 +898,7 @@ def create_dataplatform_tables(
             data_access_func_detail.arg_list
         )
         if db_name is None or server is None:
-            return []  # Return empty list
+            return Lineage.empty()  # Return empty list
 
         schema_name: str = cast(
             IdentifierAccessor, data_access_func_detail.identifier_accessor
@@ -891,12 +919,15 @@ def create_dataplatform_tables(
             qualified_table_name=qualified_table_name,
         )
 
-        return [
-            DataPlatformTable(
-                data_platform_pair=self.get_platform_pair(),
-                urn=urn,
-            )
-        ]
+        return Lineage(
+            upstreams=[
+                DataPlatformTable(
+                    data_platform_pair=self.get_platform_pair(),
+                    urn=urn,
+                )
+            ],
+            column_lineage=[],
+        )
 
 
 class NativeQueryDataPlatformTableCreator(AbstractDataPlatformTableCreator):
@@ -916,9 +947,7 @@ def is_native_parsing_supported(data_access_function_name: str) -> bool:
             in NativeQueryDataPlatformTableCreator.SUPPORTED_NATIVE_QUERY_DATA_PLATFORM
         )
 
-    def create_urn_using_old_parser(
-        self, query: str, server: str
-    ) -> List[DataPlatformTable]:
+    def create_urn_using_old_parser(self, query: str, server: str) -> Lineage:
         dataplatform_tables: List[DataPlatformTable] = []
 
         tables: List[str] = native_sql_parser.get_tables(query)
@@ -947,12 +976,14 @@ def create_urn_using_old_parser(
 
         logger.debug(f"Generated dataplatform_tables {dataplatform_tables}")
 
-        return dataplatform_tables
+        return Lineage(
+            upstreams=dataplatform_tables,
+            column_lineage=[],
+        )
 
-    def create_dataplatform_tables(
+    def create_lineage(
         self, data_access_func_detail: DataAccessFunctionDetail
-    ) -> List[DataPlatformTable]:
-        dataplatform_tables: List[DataPlatformTable] = []
+    ) -> Lineage:
         t1: Tree = cast(
             Tree, tree_function.first_arg_list_func(data_access_func_detail.arg_list)
         )
@@ -963,7 +994,7 @@ def create_dataplatform_tables(
                 f"Expecting 2 argument, actual argument count is {len(flat_argument_list)}"
             )
             logger.debug(f"Flat argument list = {flat_argument_list}")
-            return dataplatform_tables
+            return Lineage.empty()
         data_access_tokens: List[str] = tree_function.remove_whitespaces_from_list(
             tree_function.token_values(flat_argument_list[0])
         )
@@ -981,7 +1012,7 @@ def create_dataplatform_tables(
                 f"Server is not available in argument list for data-platform {data_access_tokens[0]}. Returning empty "
                 "list"
             )
-            return dataplatform_tables
+            return Lineage.empty()
 
         self.current_data_platform = self.SUPPORTED_NATIVE_QUERY_DATA_PLATFORM[
             data_access_tokens[0]
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index 5d477ee090e7e..52bcef66658c8 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -44,6 +44,11 @@
     StatefulIngestionSourceBase,
 )
 from datahub.metadata.com.linkedin.pegasus2avro.common import ChangeAuditStamps
+from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
+    FineGrainedLineage,
+    FineGrainedLineageDownstreamType,
+    FineGrainedLineageUpstreamType,
+)
 from datahub.metadata.schema_classes import (
     BrowsePathsClass,
     ChangeTypeClass,
@@ -71,6 +76,7 @@
     ViewPropertiesClass,
 )
 from datahub.utilities.dedup_list import deduplicate_list
+from datahub.utilities.sqlglot_lineage import ColumnLineageInfo
 
 # Logger instance
 logger = logging.getLogger(__name__)
@@ -165,6 +171,48 @@ def extract_dataset_schema(
         )
         return [schema_mcp]
 
+    def make_fine_grained_lineage_class(
+        self, lineage: resolver.Lineage, dataset_urn: str
+    ) -> List[FineGrainedLineage]:
+        fine_grained_lineages: List[FineGrainedLineage] = []
+
+        if (
+            self.__config.extract_column_level_lineage is False
+            or self.__config.extract_lineage is False
+        ):
+            return fine_grained_lineages
+
+        if lineage is None:
+            return fine_grained_lineages
+
+        logger.info("Extracting column level lineage")
+
+        cll: List[ColumnLineageInfo] = lineage.column_lineage
+
+        for cll_info in cll:
+            downstream = (
+                [builder.make_schema_field_urn(dataset_urn, cll_info.downstream.column)]
+                if cll_info.downstream is not None
+                and cll_info.downstream.column is not None
+                else []
+            )
+
+            upstreams = [
+                builder.make_schema_field_urn(column_ref.table, column_ref.column)
+                for column_ref in cll_info.upstreams
+            ]
+
+            fine_grained_lineages.append(
+                FineGrainedLineage(
+                    downstreamType=FineGrainedLineageDownstreamType.FIELD,
+                    downstreams=downstream,
+                    upstreamType=FineGrainedLineageUpstreamType.FIELD_SET,
+                    upstreams=upstreams,
+                )
+            )
+
+        return fine_grained_lineages
+
     def extract_lineage(
         self, table: powerbi_data_classes.Table, ds_urn: str
     ) -> List[MetadataChangeProposalWrapper]:
@@ -174,8 +222,9 @@ def extract_lineage(
         parameters = table.dataset.parameters if table.dataset else {}
 
         upstream: List[UpstreamClass] = []
+        cll_lineage: List[FineGrainedLineage] = []
 
-        upstream_dpts: List[resolver.DataPlatformTable] = parser.get_upstream_tables(
+        upstream_lineage: List[resolver.Lineage] = parser.get_upstream_tables(
             table=table,
             reporter=self.__reporter,
             platform_instance_resolver=self.__dataplatform_instance_resolver,
@@ -185,34 +234,49 @@ def extract_lineage(
         )
 
         logger.debug(
-            f"PowerBI virtual table {table.full_name} and it's upstream dataplatform tables = {upstream_dpts}"
+            f"PowerBI virtual table {table.full_name} and it's upstream dataplatform tables = {upstream_lineage}"
         )
 
-        for upstream_dpt in upstream_dpts:
-            if (
-                upstream_dpt.data_platform_pair.powerbi_data_platform_name
-                not in self.__config.dataset_type_mapping.keys()
-            ):
-                logger.debug(
-                    f"Skipping upstream table for {ds_urn}. The platform {upstream_dpt.data_platform_pair.powerbi_data_platform_name} is not part of dataset_type_mapping",
+        for lineage in upstream_lineage:
+            for upstream_dpt in lineage.upstreams:
+                if (
+                    upstream_dpt.data_platform_pair.powerbi_data_platform_name
+                    not in self.__config.dataset_type_mapping.keys()
+                ):
+                    logger.debug(
+                        f"Skipping upstream table for {ds_urn}. The platform {upstream_dpt.data_platform_pair.powerbi_data_platform_name} is not part of dataset_type_mapping",
+                    )
+                    continue
+
+                upstream_table_class = UpstreamClass(
+                    upstream_dpt.urn,
+                    DatasetLineageTypeClass.TRANSFORMED,
                 )
-                continue
 
-            upstream_table_class = UpstreamClass(
-                upstream_dpt.urn,
-                DatasetLineageTypeClass.TRANSFORMED,
-            )
+                upstream.append(upstream_table_class)
 
-            upstream.append(upstream_table_class)
+                # Add column level lineage if any
+                cll_lineage.extend(
+                    self.make_fine_grained_lineage_class(
+                        lineage=lineage,
+                        dataset_urn=ds_urn,
+                    )
+                )
 
         if len(upstream) > 0:
-            upstream_lineage = UpstreamLineageClass(upstreams=upstream)
+
+            upstream_lineage_class: UpstreamLineageClass = UpstreamLineageClass(
+                upstreams=upstream,
+                fineGrainedLineages=cll_lineage or None,
+            )
+
             logger.debug(f"Dataset urn = {ds_urn} and its lineage = {upstream_lineage}")
+
             mcp = MetadataChangeProposalWrapper(
                 entityType=Constant.DATASET,
                 changeType=ChangeTypeClass.UPSERT,
                 entityUrn=ds_urn,
-                aspect=upstream_lineage,
+                aspect=upstream_lineage_class,
             )
             mcps.append(mcp)
 
@@ -1075,6 +1139,10 @@ def report_to_datahub_work_units(
     SourceCapability.OWNERSHIP,
     "Disabled by default, configured using `extract_ownership`",
 )
+@capability(
+    SourceCapability.LINEAGE_FINE,
+    "Disabled by default, configured using `extract_column_level_lineage`. ",
+)
 class PowerBiDashboardSource(StatefulIngestionSourceBase):
     """
     This plugin extracts the following:
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_cll.json b/metadata-ingestion/tests/integration/powerbi/golden_test_cll.json
new file mode 100644
index 0000000000000..5f92cdcfb5bde
--- /dev/null
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_cll.json
@@ -0,0 +1,1357 @@
+[
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "dummy",
+            "viewLanguage": "m_query"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445"
+            },
+            "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details",
+            "name": "public issue_history",
+            "description": "Library dataset description",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "PowerBI Dataset Table",
+                "View"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "let\n    Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n    PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n    TESTTABLE_Table",
+            "viewLanguage": "m_query"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445"
+            },
+            "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details",
+            "name": "SNOWFLAKE_TESTTABLE",
+            "description": "Library dataset description",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "PowerBI Dataset Table",
+                "View"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,PBI_TEST.TEST.TESTTABLE,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n    #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n    #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n    #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n    #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n    #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n    then [UNIT] * 361\nelse 0),\n    #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n    #\"Added Custom2\"",
+            "viewLanguage": "m_query"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445"
+            },
+            "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details",
+            "name": "snowflake native-query",
+            "description": "Library dataset description",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "PowerBI Dataset Table",
+                "View"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,operations_analytics.transformed_prod.v_aps_sme_units_v4,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,operations_analytics.transformed_prod.v_aps_sme_units_v4,PROD),monthid)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,operations_analytics.transformed_prod.v_aps_sme_units_v4,PROD),seller)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV),agent_key)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,operations_analytics.transformed_prod.v_aps_sme_units_v4,PROD),client_director)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,operations_analytics.transformed_prod.v_aps_sme_units_v4,PROD),monthid)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV),cd_agent_key)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "let\n Source = GoogleBigQuery.Database([BillingProject = #\"Parameter - Source\"]),\n#\"gcp-project\" = Source{[Name=#\"Parameter - Source\"]}[Data],\nuniversal_Schema = #\"gcp-project\"{[Name=\"universal\",Kind=\"Schema\"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name=\"D_WH_DATE\",Kind=\"Table\"]}[Data],\n#\"Filtered Rows\" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#\"Filtered Rows1\" = Table.SelectRows(#\"Filtered Rows\", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#\"Filtered Rows1\"",
+            "viewLanguage": "m_query"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445"
+            },
+            "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details",
+            "name": "big-query-with-parameter",
+            "description": "Library dataset description",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "PowerBI Dataset Table",
+                "View"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n    Source",
+            "viewLanguage": "m_query"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445"
+            },
+            "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details",
+            "name": "snowflake native-query-with-join",
+            "description": "Library dataset description",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-test-project.universal.D_WH_DATE,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "PowerBI Dataset Table",
+                "View"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "let\n    Source = Oracle.Database(\"localhost:1521/salesdb.GSLAB.COM\", [HierarchicalNavigation=true]), HR = Source{[Schema=\"HR\"]}[Data], EMPLOYEES1 = HR{[Name=\"EMPLOYEES\"]}[Data] \n in EMPLOYEES1",
+            "viewLanguage": "m_query"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,gsl_test_db.public.sales_analyst,PROD)",
+                    "type": "TRANSFORMED"
+                },
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,gsl_test_db.public.sales_forecast,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,gsl_test_db.public.sales_analyst,PROD),name)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV),name)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,gsl_test_db.public.sales_analyst,PROD),name)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV),name)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445"
+            },
+            "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details",
+            "name": "job-history",
+            "description": "Library dataset description",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "PowerBI Dataset Table",
+                "View"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:oracle,salesdb.HR.EMPLOYEES,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "let\n    Source = PostgreSQL.Database(\"localhost\"  ,   \"mics\"      ),\n  public_order_date =    Source{[Schema=\"public\",Item=\"order_date\"]}[Data] \n in \n public_order_date",
+            "viewLanguage": "m_query"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445"
+            },
+            "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details",
+            "name": "postgres_test_table",
+            "description": "Library dataset description",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "PowerBI Dataset Table",
+                "View"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,mics.public.order_date,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "let\n    Source = Sql.Database(\"localhost\", \"library\"),\n dbo_book_issue = Source{[Schema=\"dbo\",Item=\"book_issue\"]}[Data]\n in dbo_book_issue",
+            "viewLanguage": "m_query"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed"
+            },
+            "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details",
+            "name": "dbo_book_issue",
+            "description": "hr pbi test description",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "PowerBI Dataset Table",
+                "View"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n    #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #\"Added Custom1\"",
+            "viewLanguage": "m_query"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed"
+            },
+            "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details",
+            "name": "ms_sql_native_table",
+            "description": "hr pbi test description",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,library.dbo.book_issue,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "PowerBI Dataset Table",
+                "View"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "corpuser",
+    "entityUrn": "urn:li:corpuser:users.User1@foo.com",
+    "changeType": "UPSERT",
+    "aspectName": "corpUserKey",
+    "aspect": {
+        "json": {
+            "username": "User1@foo.com"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "corpuser",
+    "entityUrn": "urn:li:corpuser:users.User2@foo.com",
+    "changeType": "UPSERT",
+    "aspectName": "corpUserKey",
+    "aspect": {
+        "json": {
+            "username": "User2@foo.com"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+    "changeType": "UPSERT",
+    "aspectName": "chartInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "createdFrom": "Dataset",
+                "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
+                "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details"
+            },
+            "title": "test_tile",
+            "description": "test_tile",
+            "lastModified": {
+                "created": {
+                    "time": 0,
+                    "actor": "urn:li:corpuser:unknown"
+                },
+                "lastModified": {
+                    "time": 0,
+                    "actor": "urn:li:corpuser:unknown"
+                }
+            },
+            "inputs": [
+                {
+                    "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)"
+                },
+                {
+                    "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)"
+                },
+                {
+                    "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)"
+                },
+                {
+                    "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)"
+                },
+                {
+                    "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)"
+                },
+                {
+                    "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)"
+                },
+                {
+                    "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+    "changeType": "UPSERT",
+    "aspectName": "chartKey",
+    "aspect": {
+        "json": {
+            "dashboardTool": "powerbi",
+            "chartId": "powerbi.linkedin.com/charts/B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePaths",
+    "aspect": {
+        "json": {
+            "paths": [
+                "/powerbi/demo-workspace"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "demo-workspace"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "chartInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "createdFrom": "Dataset",
+                "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed",
+                "datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details"
+            },
+            "title": "yearly_sales",
+            "description": "yearly_sales",
+            "lastModified": {
+                "created": {
+                    "time": 0,
+                    "actor": "urn:li:corpuser:unknown"
+                },
+                "lastModified": {
+                    "time": 0,
+                    "actor": "urn:li:corpuser:unknown"
+                }
+            },
+            "inputs": [
+                {
+                    "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)"
+                },
+                {
+                    "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "chartKey",
+    "aspect": {
+        "json": {
+            "dashboardTool": "powerbi",
+            "chartId": "powerbi.linkedin.com/charts/23212598-23b5-4980-87cc-5fc0ecd84385"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePaths",
+    "aspect": {
+        "json": {
+            "paths": [
+                "/powerbi/demo-workspace"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "demo-workspace"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePaths",
+    "aspect": {
+        "json": {
+            "paths": [
+                "/powerbi/demo-workspace"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "dashboardInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "chartCount": "2",
+                "workspaceName": "demo-workspace",
+                "workspaceId": "64ED5CAD-7C10-4684-8180-826122881108"
+            },
+            "title": "test_dashboard",
+            "description": "Description of test dashboard",
+            "charts": [
+                "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+                "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)"
+            ],
+            "datasets": [],
+            "lastModified": {
+                "created": {
+                    "time": 0,
+                    "actor": "urn:li:corpuser:unknown"
+                },
+                "lastModified": {
+                    "time": 0,
+                    "actor": "urn:li:corpuser:unknown"
+                }
+            },
+            "dashboardUrl": "https://localhost/dashboards/web/1"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "dashboardKey",
+    "aspect": {
+        "json": {
+            "dashboardTool": "powerbi",
+            "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:users.User1@foo.com",
+                    "type": "NONE"
+                },
+                {
+                    "owner": "urn:li:corpuser:users.User2@foo.com",
+                    "type": "NONE"
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "demo-workspace"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,employee-dataset.employee_ctc,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "dummy",
+            "viewLanguage": "m_query"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "corpuser",
+    "entityUrn": "urn:li:corpuser:users.User1@foo.com",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,employee-dataset.employee_ctc,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,employee-dataset.employee_ctc,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "PowerBI Dataset Table",
+                "View"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,employee-dataset.employee_ctc,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "datasetId": "91580e0e-1680-4b1c-bbf9-4f6764d7a5ff"
+            },
+            "externalUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/91580e0e-1680-4b1c-bbf9-4f6764d7a5ff/details",
+            "name": "employee_ctc",
+            "description": "Employee Management",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "corpuser",
+    "entityUrn": "urn:li:corpuser:users.User2@foo.com",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 2fcbf5a0c0860..2e9c02ef759a5 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -15,8 +15,9 @@
     AbstractDataPlatformInstanceResolver,
     create_dataplatform_instance_resolver,
 )
-from datahub.ingestion.source.powerbi.m_query import parser, tree_function
-from datahub.ingestion.source.powerbi.m_query.resolver import DataPlatformTable
+from datahub.ingestion.source.powerbi.m_query import parser, resolver, tree_function
+from datahub.ingestion.source.powerbi.m_query.resolver import DataPlatformTable, Lineage
+from datahub.utilities.sqlglot_lineage import ColumnLineageInfo, DownstreamColumnRef
 
 pytestmark = pytest.mark.slow
 
@@ -70,6 +71,15 @@ def get_default_instances(
     return PipelineContext(run_id="fake"), config, platform_instance_resolver
 
 
+def combine_upstreams_from_lineage(lineage: List[Lineage]) -> List[DataPlatformTable]:
+    data_platforms: List[DataPlatformTable] = []
+
+    for item in lineage:
+        data_platforms.extend(item.upstreams)
+
+    return data_platforms
+
+
 @pytest.mark.integration
 def test_parse_m_query1():
     expression: str = M_QUERIES[0]
@@ -182,7 +192,7 @@ def test_snowflake_regular_case():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -212,7 +222,7 @@ def test_postgres_regular_case():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -242,7 +252,7 @@ def test_databricks_regular_case():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -272,7 +282,7 @@ def test_oracle_regular_case():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -302,7 +312,7 @@ def test_mssql_regular_case():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -348,7 +358,7 @@ def test_mssql_with_query():
             ctx=ctx,
             config=config,
             platform_instance_resolver=platform_instance_resolver,
-        )
+        )[0].upstreams
 
         assert len(data_platform_tables) == 1
         assert data_platform_tables[0].urn == expected_tables[index]
@@ -388,7 +398,7 @@ def test_snowflake_native_query():
             ctx=ctx,
             config=config,
             platform_instance_resolver=platform_instance_resolver,
-        )
+        )[0].upstreams
 
         assert len(data_platform_tables) == 1
         assert data_platform_tables[0].urn == expected_tables[index]
@@ -410,7 +420,7 @@ def test_google_bigquery_1():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -442,7 +452,7 @@ def test_google_bigquery_2():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -472,7 +482,7 @@ def test_for_each_expression_1():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -501,7 +511,7 @@ def test_for_each_expression_2():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -523,15 +533,15 @@ def test_native_query_disabled():
     reporter = PowerBiDashboardSourceReport()
 
     ctx, config, platform_instance_resolver = get_default_instances()
-    config.native_query_parsing = False
-    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+    config.native_query_parsing = False  # Disable native query parsing
+    lineage: List[Lineage] = parser.get_upstream_tables(
         table,
         reporter,
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
     )
-    assert len(data_platform_tables) == 0
+    assert len(lineage) == 0
 
 
 @pytest.mark.integration
@@ -548,12 +558,14 @@ def test_multi_source_table():
 
     ctx, config, platform_instance_resolver = get_default_instances()
 
-    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
-        table,
-        reporter,
-        ctx=ctx,
-        config=config,
-        platform_instance_resolver=platform_instance_resolver,
+    data_platform_tables: List[DataPlatformTable] = combine_upstreams_from_lineage(
+        parser.get_upstream_tables(
+            table,
+            reporter,
+            ctx=ctx,
+            config=config,
+            platform_instance_resolver=platform_instance_resolver,
+        )
     )
 
     assert len(data_platform_tables) == 2
@@ -581,12 +593,14 @@ def test_table_combine():
 
     ctx, config, platform_instance_resolver = get_default_instances()
 
-    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
-        table,
-        reporter,
-        ctx=ctx,
-        config=config,
-        platform_instance_resolver=platform_instance_resolver,
+    data_platform_tables: List[DataPlatformTable] = combine_upstreams_from_lineage(
+        parser.get_upstream_tables(
+            table,
+            reporter,
+            ctx=ctx,
+            config=config,
+            platform_instance_resolver=platform_instance_resolver,
+        )
     )
 
     assert len(data_platform_tables) == 2
@@ -624,7 +638,7 @@ def test_expression_is_none():
 
     ctx, config, platform_instance_resolver = get_default_instances()
 
-    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+    lineage: List[Lineage] = parser.get_upstream_tables(
         table,
         reporter,
         ctx=ctx,
@@ -632,7 +646,7 @@ def test_expression_is_none():
         platform_instance_resolver=platform_instance_resolver,
     )
 
-    assert len(data_platform_tables) == 0
+    assert len(lineage) == 0
 
 
 def test_redshift_regular_case():
@@ -651,7 +665,7 @@ def test_redshift_regular_case():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -678,7 +692,7 @@ def test_redshift_native_query():
         ctx=ctx,
         config=config,
         platform_instance_resolver=platform_instance_resolver,
-    )
+    )[0].upstreams
 
     assert len(data_platform_tables) == 1
     assert (
@@ -708,7 +722,7 @@ def test_sqlglot_parser():
         }
     )
 
-    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+    lineage: List[resolver.Lineage] = parser.get_upstream_tables(
         table,
         reporter,
         ctx=ctx,
@@ -716,6 +730,8 @@ def test_sqlglot_parser():
         platform_instance_resolver=platform_instance_resolver,
     )
 
+    data_platform_tables: List[DataPlatformTable] = lineage[0].upstreams
+
     assert len(data_platform_tables) == 2
     assert (
         data_platform_tables[0].urn
@@ -725,3 +741,76 @@ def test_sqlglot_parser():
         data_platform_tables[1].urn
         == "urn:li:dataset:(urn:li:dataPlatform:snowflake,sales_deployment.operations_analytics.transformed_prod.v_sme_unit_targets,PROD)"
     )
+
+    assert lineage[0].column_lineage == [
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="client_director"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="tier"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column='upper("manager")'),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="team_type"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="date_target"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="monthid"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="target_team"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="seller_email"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="agent_key"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="sme_quota"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="revenue_quota"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="service_quota"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="bl_target"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="software_quota"),
+            upstreams=[],
+            logic=None,
+        ),
+    ]
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index 044532021a19c..b0695e3ea9954 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -1,4 +1,5 @@
 import logging
+import re
 import sys
 from typing import Any, Dict, List, cast
 from unittest import mock
@@ -1127,7 +1128,7 @@ def test_dataset_type_mapping_error(
     """
     register_mock_api(request_mock=requests_mock)
 
-    try:
+    with pytest.raises(Exception, match=r"dataset_type_mapping is deprecated"):
         Pipeline.create(
             {
                 "run_id": "powerbi-test",
@@ -1150,11 +1151,6 @@ def test_dataset_type_mapping_error(
                 },
             }
         )
-    except Exception as e:
-        assert (
-            "dataset_type_mapping is deprecated. Use server_to_platform_instance only."
-            in str(e)
-        )
 
 
 @freeze_time(FROZEN_TIME)
@@ -1506,3 +1502,90 @@ def test_independent_datasets_extraction(
         output_path=tmp_path / "powerbi_independent_mces.json",
         golden_path=f"{test_resources_dir}/{golden_file}",
     )
+
+
+@freeze_time(FROZEN_TIME)
+@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
+def test_cll_extraction(mock_msal, pytestconfig, tmp_path, mock_time, requests_mock):
+
+    test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
+
+    register_mock_api(
+        request_mock=requests_mock,
+    )
+
+    default_conf: dict = default_source_config()
+
+    del default_conf[
+        "dataset_type_mapping"
+    ]  # delete this key so that connector set it to default (all dataplatform)
+
+    pipeline = Pipeline.create(
+        {
+            "run_id": "powerbi-test",
+            "source": {
+                "type": "powerbi",
+                "config": {
+                    **default_conf,
+                    "extract_lineage": True,
+                    "extract_column_level_lineage": True,
+                    "enable_advance_lineage_sql_construct": True,
+                    "native_query_parsing": True,
+                    "extract_independent_datasets": True,
+                },
+            },
+            "sink": {
+                "type": "file",
+                "config": {
+                    "filename": f"{tmp_path}/powerbi_cll_mces.json",
+                },
+            },
+        }
+    )
+
+    pipeline.run()
+    pipeline.raise_from_status()
+    golden_file = "golden_test_cll.json"
+
+    mce_helpers.check_golden_file(
+        pytestconfig,
+        output_path=tmp_path / "powerbi_cll_mces.json",
+        golden_path=f"{test_resources_dir}/{golden_file}",
+    )
+
+
+@freeze_time(FROZEN_TIME)
+@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
+def test_cll_extraction_flags(
+    mock_msal, pytestconfig, tmp_path, mock_time, requests_mock
+):
+
+    register_mock_api(
+        request_mock=requests_mock,
+    )
+
+    default_conf: dict = default_source_config()
+    pattern: str = re.escape(
+        "Enable all these flags in recipe: ['native_query_parsing', 'enable_advance_lineage_sql_construct', 'extract_lineage']"
+    )
+
+    with pytest.raises(Exception, match=pattern):
+
+        Pipeline.create(
+            {
+                "run_id": "powerbi-test",
+                "source": {
+                    "type": "powerbi",
+                    "config": {
+                        **default_conf,
+                        "extract_column_level_lineage": True,
+                    },
+                },
+                "sink": {
+                    "type": "file",
+                    "config": {
+                        "filename": f"{tmp_path}/powerbi_cll_mces.json",
+                    },
+                },
+            }
+        )

From a300b39f15cd689b42b7c32ce9e5087ccf5a356e Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Wed, 4 Oct 2023 06:53:15 -0400
Subject: [PATCH 082/156] feat(ingest/airflow): airflow plugin v2 (#8853)

---
 .github/workflows/airflow-plugin.yml          |   25 +-
 .github/workflows/build-and-test.yml          |    9 +-
 docker/airflow/local_airflow.md               |    2 +-
 docs/how/updating-datahub.md                  |    3 +
 docs/lineage/airflow.md                       |  251 ++-
 .../airflow-plugin/build.gradle               |   23 +-
 .../airflow-plugin/pyproject.toml             |    1 +
 .../airflow-plugin/setup.cfg                  |   30 +-
 .../airflow-plugin/setup.py                   |   71 +-
 .../datahub_airflow_plugin/_airflow_shims.py  |   32 +
 .../src/datahub_airflow_plugin/_config.py     |   80 +
 .../_datahub_listener_module.py               |    7 +
 .../_datahub_ol_adapter.py                    |   23 +
 .../src/datahub_airflow_plugin/_extractors.py |  244 ++
 .../client/airflow_generator.py               |   69 +-
 .../datahub_listener.py                       |  494 +++++
 .../datahub_airflow_plugin/datahub_plugin.py  |  391 +---
 .../datahub_plugin_v22.py                     |  336 +++
 .../example_dags/lineage_emission_dag.py      |   22 +-
 .../datahub_airflow_plugin/hooks/datahub.py   |  115 +-
 .../{ => lineage}/_lineage_core.py            |   30 +-
 .../datahub_airflow_plugin/lineage/datahub.py |   28 +-
 .../operators/datahub.py                      |    4 +-
 .../airflow-plugin/tests/conftest.py          |    6 +
 .../tests/integration/dags/basic_iolets.py    |   34 +
 .../tests/integration/dags/simple_dag.py      |   34 +
 .../integration/dags/snowflake_operator.py    |   32 +
 .../tests/integration/dags/sqlite_operator.py |   75 +
 .../integration/goldens/v1_basic_iolets.json  |  533 +++++
 .../integration/goldens/v1_simple_dag.json    |  718 ++++++
 .../integration/goldens/v2_basic_iolets.json  |  535 +++++
 .../v2_basic_iolets_no_dag_listener.json      |  535 +++++
 .../integration/goldens/v2_simple_dag.json    |  666 ++++++
 .../v2_simple_dag_no_dag_listener.json        |  722 ++++++
 .../goldens/v2_snowflake_operator.json        |  507 +++++
 .../goldens/v2_sqlite_operator.json           | 1735 +++++++++++++++
 .../v2_sqlite_operator_no_dag_listener.json   | 1955 +++++++++++++++++
 .../integration/integration_test_dummy.py     |    2 -
 .../tests/integration/test_plugin.py          |  392 ++++
 .../airflow-plugin/tests/unit/test_airflow.py |   25 +-
 .../airflow-plugin/tests/unit/test_dummy.py   |    2 -
 .../tests/unit/test_packaging.py              |    8 +
 .../airflow-plugin/tox.ini                    |   39 +-
 metadata-ingestion/setup.py                   |   20 +-
 .../api/entities/corpgroup/corpgroup.py       |   33 +-
 .../datahub/api/entities/corpuser/corpuser.py |    9 +-
 .../datahub/api/entities/datajob/dataflow.py  |   19 +-
 .../datahub/api/entities/datajob/datajob.py   |   39 +-
 .../dataprocess/dataprocess_instance.py       |   21 +-
 .../api/entities/dataproduct/dataproduct.py   |   22 +-
 .../src/datahub/emitter/generic_emitter.py    |   31 +
 .../src/datahub/emitter/kafka_emitter.py      |    3 +-
 .../src/datahub/emitter/rest_emitter.py       |   16 +-
 .../emitter/synchronized_file_emitter.py      |   60 +
 .../src/datahub/ingestion/graph/client.py     |   17 +
 .../datahub/ingestion/source/kafka_connect.py |    4 +-
 .../ingestion/source/sql/sql_common.py        |   48 -
 .../source/sql/sqlalchemy_uri_mapper.py       |   47 +
 .../src/datahub/ingestion/source/superset.py  |    6 +-
 .../src/datahub/ingestion/source/tableau.py   |   11 +-
 .../integrations/great_expectations/action.py |    4 +-
 .../datahub/testing/compare_metadata_json.py  |   22 +-
 .../src/datahub/utilities/sqlglot_lineage.py  |   40 +-
 .../goldens/test_create_table_ddl.json        |    8 +
 .../unit/sql_parsing/test_sqlglot_lineage.py  |   15 +
 .../tests/unit/test_sql_common.py             |    7 +-
 66 files changed, 10457 insertions(+), 890 deletions(-)
 create mode 100644 metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_listener_module.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py
 rename metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/{ => lineage}/_lineage_core.py (72%)
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/conftest.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/dags/snowflake_operator.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json
 delete mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/integration_test_dummy.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py
 delete mode 100644 metadata-ingestion-modules/airflow-plugin/tests/unit/test_dummy.py
 create mode 100644 metadata-ingestion-modules/airflow-plugin/tests/unit/test_packaging.py
 create mode 100644 metadata-ingestion/src/datahub/emitter/generic_emitter.py
 create mode 100644 metadata-ingestion/src/datahub/emitter/synchronized_file_emitter.py
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py
 create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json

diff --git a/.github/workflows/airflow-plugin.yml b/.github/workflows/airflow-plugin.yml
index 63bab821cc398..a250bddcc16d1 100644
--- a/.github/workflows/airflow-plugin.yml
+++ b/.github/workflows/airflow-plugin.yml
@@ -32,16 +32,21 @@ jobs:
     strategy:
       matrix:
         include:
-          - python-version: "3.7"
-            extraPythonRequirement: "apache-airflow~=2.1.0"
-          - python-version: "3.7"
-            extraPythonRequirement: "apache-airflow~=2.2.0"
+          - python-version: "3.8"
+            extra_pip_requirements: "apache-airflow~=2.1.4"
+            extra_pip_extras: plugin-v1
+          - python-version: "3.8"
+            extra_pip_requirements: "apache-airflow~=2.2.4"
+            extra_pip_extras: plugin-v1
           - python-version: "3.10"
-            extraPythonRequirement: "apache-airflow~=2.4.0"
+            extra_pip_requirements: "apache-airflow~=2.4.0"
+            extra_pip_extras: plugin-v2
           - python-version: "3.10"
-            extraPythonRequirement: "apache-airflow~=2.6.0"
+            extra_pip_requirements: "apache-airflow~=2.6.0"
+            extra_pip_extras: plugin-v2
           - python-version: "3.10"
-            extraPythonRequirement: "apache-airflow>2.6.0"
+            extra_pip_requirements: "apache-airflow>=2.7.0"
+            extra_pip_extras: plugin-v2
       fail-fast: false
     steps:
       - uses: actions/checkout@v3
@@ -51,13 +56,13 @@ jobs:
           cache: "pip"
       - name: Install dependencies
         run: ./metadata-ingestion/scripts/install_deps.sh
-      - name: Install airflow package and test  (extras ${{ matrix.extraPythonRequirement }})
-        run: ./gradlew -Pextra_pip_requirements='${{ matrix.extraPythonRequirement }}' :metadata-ingestion-modules:airflow-plugin:lint :metadata-ingestion-modules:airflow-plugin:testQuick
+      - name: Install airflow package and test (extras ${{ matrix.extra_pip_requirements }})
+        run: ./gradlew -Pextra_pip_requirements='${{ matrix.extra_pip_requirements }}' -Pextra_pip_extras='${{ matrix.extra_pip_extras }}' :metadata-ingestion-modules:airflow-plugin:lint :metadata-ingestion-modules:airflow-plugin:testQuick
       - name: pip freeze show list installed
         if: always()
         run: source metadata-ingestion-modules/airflow-plugin/venv/bin/activate && pip freeze
       - uses: actions/upload-artifact@v3
-        if: ${{ always() && matrix.python-version == '3.10' && matrix.extraPythonRequirement == 'apache-airflow>2.6.0' }}
+        if: ${{ always() && matrix.python-version == '3.10' && matrix.extra_pip_requirements == 'apache-airflow>=2.7.0' }}
         with:
           name: Test Results (Airflow Plugin ${{ matrix.python-version}})
           path: |
diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
index f6320e1bd5c9f..3f409878b191f 100644
--- a/.github/workflows/build-and-test.yml
+++ b/.github/workflows/build-and-test.yml
@@ -26,9 +26,9 @@ jobs:
       matrix:
         command:
           [
-            "./gradlew build -x :metadata-ingestion:build -x :metadata-ingestion:check -x docs-website:build -x :metadata-integration:java:spark-lineage:test -x :metadata-io:test -x :metadata-ingestion-modules:airflow-plugin:build -x :datahub-frontend:build -x :datahub-web-react:build --parallel",
+            # metadata-ingestion and airflow-plugin each have dedicated build jobs
+            "./gradlew build -x :metadata-ingestion:build -x :metadata-ingestion:check -x docs-website:build -x :metadata-integration:java:spark-lineage:test -x :metadata-io:test -x :metadata-ingestion-modules:airflow-plugin:build -x :metadata-ingestion-modules:airflow-plugin:check -x :datahub-frontend:build -x :datahub-web-react:build --parallel",
             "./gradlew :datahub-frontend:build :datahub-web-react:build --parallel",
-            "./gradlew :metadata-ingestion-modules:airflow-plugin:build --parallel"
           ]
         timezone:
           [
@@ -51,7 +51,8 @@ jobs:
           java-version: 11
       - uses: actions/setup-python@v4
         with:
-          python-version: "3.7"
+          python-version: "3.10"
+          cache: pip
       - name: Gradle build (and test)
         run: |
           ${{ matrix.command }}
@@ -81,7 +82,7 @@ jobs:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v4
         with:
-          python-version: "3.7"
+          python-version: "3.10"
       - name: Download YQ
         uses: chrisdickinson/setup-yq@v1.0.1
         with:
diff --git a/docker/airflow/local_airflow.md b/docker/airflow/local_airflow.md
index 55a64f5c122c5..fbfc1d17327c5 100644
--- a/docker/airflow/local_airflow.md
+++ b/docker/airflow/local_airflow.md
@@ -1,6 +1,6 @@
 :::caution
 
-This feature is currently unmaintained. As of 0.10.0 the container described is not published alongside the DataHub CLI. If you'd like to use it, please reach out to us on the [community slack.](docs/slack.md)
+This guide is currently unmaintained. As of 0.10.0 the container described is not published alongside the DataHub CLI. If you'd like to use it, please reach out to us on the [community slack.](docs/slack.md)
 
 :::
 
diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md
index 9b19291ee246a..4df8d435cf1c4 100644
--- a/docs/how/updating-datahub.md
+++ b/docs/how/updating-datahub.md
@@ -5,7 +5,10 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
 ## Next
 
 ### Breaking Changes
+
 - #8810 - Removed support for SQLAlchemy 1.3.x. Only SQLAlchemy 1.4.x is supported now.
+- #8853 - The Airflow plugin no longer supports Airflow 2.0.x or Python 3.7. See the docs for more details.
+- #8853 - Introduced the Airflow plugin v2. If you're using Airflow 2.3+, the v2 plugin will be enabled by default, and so you'll need to switch your requirements to include `pip install 'acryl-datahub-airflow-plugin[plugin-v2]'`. To continue using the v1 plugin, set the `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN` environment variable to `true`.
 
 ### Potential Downtime
 
diff --git a/docs/lineage/airflow.md b/docs/lineage/airflow.md
index 49de5352f6d58..19ed1598d4c5a 100644
--- a/docs/lineage/airflow.md
+++ b/docs/lineage/airflow.md
@@ -1,74 +1,137 @@
 # Airflow Integration
 
-DataHub supports integration of
+:::note
 
-- Airflow Pipeline (DAG) metadata
-- DAG and Task run information as well as
-- Lineage information when present
+If you're looking to schedule DataHub ingestion using Airflow, see the guide on [scheduling ingestion with Airflow](../../metadata-ingestion/schedule_docs/airflow.md).
 
-You can use either the DataHub Airflow lineage plugin (recommended) or the Airflow lineage backend (deprecated).
+:::
 
-## Using Datahub's Airflow lineage plugin
+The DataHub Airflow plugin supports:
 
-:::note
+- Automatic column-level lineage extraction from various operators e.g. `SqlOperator`s (including `MySqlOperator`, `PostgresOperator`, `SnowflakeOperator`, and more), `S3FileTransformOperator`, and a few others.
+- Airflow DAG and tasks, including properties, ownership, and tags.
+- Task run information, including task successes and failures.
+- Manual lineage annotations using `inlets` and `outlets` on Airflow operators.
 
-The Airflow lineage plugin is only supported with Airflow version >= 2.0.2 or on MWAA with an Airflow version >= 2.0.2.
+There's two actively supported implementations of the plugin, with different Airflow version support.
 
-If you're using Airflow 1.x, use the Airflow lineage plugin with acryl-datahub-airflow-plugin <= 0.9.1.0.
+| Approach  | Airflow Version | Notes                                                                       |
+| --------- | --------------- | --------------------------------------------------------------------------- |
+| Plugin v2 | 2.3+            | Recommended. Requires Python 3.8+                                           |
+| Plugin v1 | 2.1+            | No automatic lineage extraction; may not extract lineage if the task fails. |
 
-:::
+If you're using Airflow older than 2.1, it's possible to use the v1 plugin with older versions of `acryl-datahub-airflow-plugin`. See the [compatibility section](#compatibility) for more details.
 
-This plugin registers a task success/failure callback on every task with a cluster policy and emits DataHub events from that. This allows this plugin to be able to register both task success as well as failures compared to the older Airflow Lineage Backend which could only support emitting task success.
+<!-- TODO: Update the local Airflow guide and link to it here. -->
+<!-- If you are looking to run Airflow and DataHub using docker locally, follow the guide [here](../../docker/airflow/local_airflow.md). -->
 
-### Setup
+## DataHub Plugin v2
 
-1. You need to install the required dependency in your airflow.
+### Installation
+
+The v2 plugin requires Airflow 2.3+ and Python 3.8+. If you don't meet these requirements, use the v1 plugin instead.
 
 ```shell
-pip install acryl-datahub-airflow-plugin
+pip install 'acryl-datahub-airflow-plugin[plugin-v2]'
 ```
 
-:::note
+### Configuration
 
-The [DataHub Rest](../../metadata-ingestion/sink_docs/datahub.md#datahub-rest) emitter is included in the plugin package by default. To use [DataHub Kafka](../../metadata-ingestion/sink_docs/datahub.md#datahub-kafka) install `pip install acryl-datahub-airflow-plugin[datahub-kafka]`.
+Set up a DataHub connection in Airflow.
 
-:::
+```shell
+airflow connections add  --conn-type 'datahub-rest' 'datahub_rest_default' --conn-host 'http://datahub-gms:8080' --conn-password '<optional datahub auth token>'
+```
+
+No additional configuration is required to use the plugin. However, there are some optional configuration parameters that can be set in the `airflow.cfg` file.
+
+```ini title="airflow.cfg"
+[datahub]
+# Optional - additional config here.
+enabled = True  # default
+```
+
+| Name                       | Default value        | Description                                                                              |
+| -------------------------- | -------------------- | ---------------------------------------------------------------------------------------- |
+| enabled                    | true                 | If the plugin should be enabled.                                                         |
+| conn_id                    | datahub_rest_default | The name of the datahub rest connection.                                                 |
+| cluster                    | prod                 | name of the airflow cluster                                                              |
+| capture_ownership_info     | true                 | Extract DAG ownership.                                                                   |
+| capture_tags_info          | true                 | Extract DAG tags.                                                                        |
+| capture_executions         | true                 | Extract task runs and success/failure statuses. This will show up in DataHub "Runs" tab. |
+| enable_extractors          | true                 | Enable automatic lineage extraction.                                                     |
+| disable_openlineage_plugin | true                 | Disable the OpenLineage plugin to avoid duplicative processing.                          |
+| log_level                  | _no change_          | [debug] Set the log level for the plugin.                                                |
+| debug_emitter              | false                | [debug] If true, the plugin will log the emitted events.                                 |
+
+### Automatic lineage extraction
+
+To automatically extract lineage information, the v2 plugin builds on top of Airflow's built-in [OpenLineage extractors](https://openlineage.io/docs/integrations/airflow/default-extractors).
 
-2. Disable lazy plugin loading in your airflow.cfg.
-   On MWAA you should add this config to your [Apache Airflow configuration options](https://docs.aws.amazon.com/mwaa/latest/userguide/configuring-env-variables.html#configuring-2.0-airflow-override).
+The SQL-related extractors have been updated to use DataHub's SQL parser, which is more robust than the built-in one and uses DataHub's metadata information to generate column-level lineage. We discussed the DataHub SQL parser, including why schema-aware parsing works better and how it performs on benchmarks, during the [June 2023 community town hall](https://youtu.be/1QVcUmRQK5E?si=U27zygR7Gi_KdkzE&t=2309).
+
+## DataHub Plugin v1
+
+### Installation
+
+The v1 plugin requires Airflow 2.1+ and Python 3.8+. If you're on older versions, it's still possible to use an older version of the plugin. See the [compatibility section](#compatibility) for more details.
+
+If you're using Airflow 2.3+, we recommend using the v2 plugin instead. If you need to use the v1 plugin with Airflow 2.3+, you must also set the environment variable `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN=true`.
+
+```shell
+pip install 'acryl-datahub-airflow-plugin[plugin-v1]'
+
+# The DataHub rest connection type is included by default.
+# To use the DataHub Kafka connection type, install the plugin with the kafka extras.
+pip install 'acryl-datahub-airflow-plugin[plugin-v1,datahub-kafka]'
+```
+
+<!-- This plugin registers a task success/failure callback on every task with a cluster policy and emits DataHub events from that. This allows this plugin to be able to register both task success as well as failures compared to the older Airflow Lineage Backend which could only support emitting task success. -->
+
+### Configuration
+
+#### Disable lazy plugin loading
 
 ```ini title="airflow.cfg"
 [core]
 lazy_load_plugins = False
 ```
 
-3. You must configure an Airflow hook for Datahub. We support both a Datahub REST hook and a Kafka-based hook, but you only need one.
+On MWAA you should add this config to your [Apache Airflow configuration options](https://docs.aws.amazon.com/mwaa/latest/userguide/configuring-env-variables.html#configuring-2.0-airflow-override).
+
+#### Setup a DataHub connection
 
-   ```shell
-   # For REST-based:
-   airflow connections add  --conn-type 'datahub_rest' 'datahub_rest_default' --conn-host 'http://datahub-gms:8080' --conn-password '<optional datahub auth token>'
-   # For Kafka-based (standard Kafka sink config can be passed via extras):
-   airflow connections add  --conn-type 'datahub_kafka' 'datahub_kafka_default' --conn-host 'broker:9092' --conn-extra '{}'
-   ```
+You must configure an Airflow connection for Datahub. We support both a Datahub REST and a Kafka-based connections, but you only need one.
 
-4. Add your `datahub_conn_id` and/or `cluster` to your `airflow.cfg` file if it is not align with the default values. See configuration parameters below
+```shell
+# For REST-based:
+airflow connections add  --conn-type 'datahub_rest' 'datahub_rest_default' --conn-host 'http://datahub-gms:8080' --conn-password '<optional datahub auth token>'
+# For Kafka-based (standard Kafka sink config can be passed via extras):
+airflow connections add  --conn-type 'datahub_kafka' 'datahub_kafka_default' --conn-host 'broker:9092' --conn-extra '{}'
+```
 
-   **Configuration options:**
+#### Configure the plugin
 
-   | Name                           | Default value        | Description                                                                                                                                                                            |
-   | ------------------------------ | -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-   | datahub.enabled                | true                 | If the plugin should be enabled.                                                                                                                                                       |
-   | datahub.conn_id                | datahub_rest_default | The name of the datahub connection you set in step 1.                                                                                                                                  |
-   | datahub.cluster                | prod                 | name of the airflow cluster                                                                                                                                                            |
-   | datahub.capture_ownership_info | true                 | If true, the owners field of the DAG will be capture as a DataHub corpuser.                                                                                                            |
-   | datahub.capture_tags_info      | true                 | If true, the tags field of the DAG will be captured as DataHub tags.                                                                                                                   |
-   | datahub.capture_executions     | true                 | If true, we'll capture task runs in DataHub in addition to DAG definitions.                                                                                                            |
-   | datahub.graceful_exceptions    | true                 | If set to true, most runtime errors in the lineage backend will be suppressed and will not cause the overall task to fail. Note that configuration issues will still throw exceptions. |
+If your config doesn't align with the default values, you can configure the plugin in your `airflow.cfg` file.
+
+```ini title="airflow.cfg"
+[datahub]
+enabled = true
+conn_id = datahub_rest_default  # or datahub_kafka_default
+# etc.
+```
 
-5. Configure `inlets` and `outlets` for your Airflow operators. For reference, look at the sample DAG in [`lineage_backend_demo.py`](../../metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_backend_demo.py), or reference [`lineage_backend_taskflow_demo.py`](../../metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_backend_taskflow_demo.py) if you're using the [TaskFlow API](https://airflow.apache.org/docs/apache-airflow/stable/concepts/taskflow.html).
-6. [optional] Learn more about [Airflow lineage](https://airflow.apache.org/docs/apache-airflow/stable/lineage.html), including shorthand notation and some automation.
+| Name                   | Default value        | Description                                                                                                                                                                            |
+| ---------------------- | -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| enabled                | true                 | If the plugin should be enabled.                                                                                                                                                       |
+| conn_id                | datahub_rest_default | The name of the datahub connection you set in step 1.                                                                                                                                  |
+| cluster                | prod                 | name of the airflow cluster                                                                                                                                                            |
+| capture_ownership_info | true                 | If true, the owners field of the DAG will be capture as a DataHub corpuser.                                                                                                            |
+| capture_tags_info      | true                 | If true, the tags field of the DAG will be captured as DataHub tags.                                                                                                                   |
+| capture_executions     | true                 | If true, we'll capture task runs in DataHub in addition to DAG definitions.                                                                                                            |
+| graceful_exceptions    | true                 | If set to true, most runtime errors in the lineage backend will be suppressed and will not cause the overall task to fail. Note that configuration issues will still throw exceptions. |
 
-### How to validate installation
+#### Validate that the plugin is working
 
 1. Go and check in Airflow at Admin -> Plugins menu if you can see the DataHub plugin
 2. Run an Airflow DAG. In the task logs, you should see Datahub related log messages like:
@@ -77,9 +140,22 @@ lazy_load_plugins = False
 Emitting DataHub ...
 ```
 
-### Emitting lineage via a custom operator to the Airflow Plugin
+## Manual Lineage Annotation
+
+### Using `inlets` and `outlets`
+
+You can manually annotate lineage by setting `inlets` and `outlets` on your Airflow operators. This is useful if you're using an operator that doesn't support automatic lineage extraction, or if you want to override the automatic lineage extraction.
+
+We have a few code samples that demonstrate how to use `inlets` and `outlets`:
 
-If you have created a custom Airflow operator [docs](https://airflow.apache.org/docs/apache-airflow/stable/howto/custom-operator.html) that inherits from the BaseOperator class,
+- [`lineage_backend_demo.py`](../../metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_backend_demo.py)
+- [`lineage_backend_taskflow_demo.py`](../../metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_backend_taskflow_demo.py) - uses the [TaskFlow API](https://airflow.apache.org/docs/apache-airflow/stable/concepts/taskflow.html)
+
+For more information, take a look at the [Airflow lineage docs](https://airflow.apache.org/docs/apache-airflow/stable/lineage.html).
+
+### Custom Operators
+
+If you have created a [custom Airflow operator](https://airflow.apache.org/docs/apache-airflow/stable/howto/custom-operator.html) that inherits from the BaseOperator class,
 when overriding the `execute` function, set inlets and outlets via `context['ti'].task.inlets` and `context['ti'].task.outlets`.
 The DataHub Airflow plugin will then pick up those inlets and outlets after the task runs.
 
@@ -90,7 +166,7 @@ class DbtOperator(BaseOperator):
     def execute(self, context):
         # do something
         inlets, outlets = self._get_lineage()
-        # inlets/outlets are lists of either datahub_provider.entities.Dataset or datahub_provider.entities.Urn
+        # inlets/outlets are lists of either datahub_airflow_plugin.entities.Dataset or datahub_airflow_plugin.entities.Urn
         context['ti'].task.inlets = self.inlets
         context['ti'].task.outlets = self.outlets
 
@@ -100,78 +176,25 @@ class DbtOperator(BaseOperator):
         return inlets, outlets
 ```
 
-If you override the `pre_execute` and `post_execute` function, ensure they include the `@prepare_lineage` and `@apply_lineage` decorators respectively. [source](https://airflow.apache.org/docs/apache-airflow/stable/administration-and-deployment/lineage.html#lineage)
-
-## Using DataHub's Airflow lineage backend (deprecated)
-
-:::caution
-
-The DataHub Airflow plugin (above) is the recommended way to integrate Airflow with DataHub. For managed services like MWAA, the lineage backend is not supported and so you must use the Airflow plugin.
-
-If you're using Airflow 1.x, we recommend using the Airflow lineage backend with acryl-datahub <= 0.9.1.0.
-
-:::
-
-:::note
-
-If you are looking to run Airflow and DataHub using docker locally, follow the guide [here](../../docker/airflow/local_airflow.md). Otherwise proceed to follow the instructions below.
-:::
-
-### Setting up Airflow to use DataHub as Lineage Backend
-
-1. You need to install the required dependency in your airflow. See <https://registry.astronomer.io/providers/datahub/modules/datahublineagebackend>
-
-```shell
-pip install acryl-datahub[airflow]
-# If you need the Kafka-based emitter/hook:
-pip install acryl-datahub[airflow,datahub-kafka]
-```
-
-2. You must configure an Airflow hook for Datahub. We support both a Datahub REST hook and a Kafka-based hook, but you only need one.
-
-   ```shell
-   # For REST-based:
-   airflow connections add  --conn-type 'datahub_rest' 'datahub_rest_default' --conn-host 'http://datahub-gms:8080' --conn-password '<optional datahub auth token>'
-   # For Kafka-based (standard Kafka sink config can be passed via extras):
-   airflow connections add  --conn-type 'datahub_kafka' 'datahub_kafka_default' --conn-host 'broker:9092' --conn-extra '{}'
-   ```
+If you override the `pre_execute` and `post_execute` function, ensure they include the `@prepare_lineage` and `@apply_lineage` decorators respectively. Reference the [Airflow docs](https://airflow.apache.org/docs/apache-airflow/stable/administration-and-deployment/lineage.html#lineage) for more details.
 
-3. Add the following lines to your `airflow.cfg` file.
+## Emit Lineage Directly
 
-   ```ini title="airflow.cfg"
-   [lineage]
-   backend = datahub_provider.lineage.datahub.DatahubLineageBackend
-   datahub_kwargs = {
-       "enabled": true,
-       "datahub_conn_id": "datahub_rest_default",
-       "cluster": "prod",
-       "capture_ownership_info": true,
-       "capture_tags_info": true,
-       "graceful_exceptions": true }
-   # The above indentation is important!
-   ```
+If you can't use the plugin or annotate inlets/outlets, you can also emit lineage using the `DatahubEmitterOperator`.
 
-   **Configuration options:**
+Reference [`lineage_emission_dag.py`](../../metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py) for a full example.
 
-   - `datahub_conn_id` (required): Usually `datahub_rest_default` or `datahub_kafka_default`, depending on what you named the connection in step 1.
-   - `cluster` (defaults to "prod"): The "cluster" to associate Airflow DAGs and tasks with.
-   - `capture_ownership_info` (defaults to true): If true, the owners field of the DAG will be capture as a DataHub corpuser.
-   - `capture_tags_info` (defaults to true): If true, the tags field of the DAG will be captured as DataHub tags.
-   - `capture_executions` (defaults to false): If true, it captures task runs as DataHub DataProcessInstances.
-   - `graceful_exceptions` (defaults to true): If set to true, most runtime errors in the lineage backend will be suppressed and will not cause the overall task to fail. Note that configuration issues will still throw exceptions.
+In order to use this example, you must first configure the Datahub hook. Like in ingestion, we support a Datahub REST hook and a Kafka-based hook. See the plugin configuration for examples.
 
-4. Configure `inlets` and `outlets` for your Airflow operators. For reference, look at the sample DAG in [`lineage_backend_demo.py`](../../metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_backend_demo.py), or reference [`lineage_backend_taskflow_demo.py`](../../metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_backend_taskflow_demo.py) if you're using the [TaskFlow API](https://airflow.apache.org/docs/apache-airflow/stable/concepts/taskflow.html).
-5. [optional] Learn more about [Airflow lineage](https://airflow.apache.org/docs/apache-airflow/stable/lineage.html), including shorthand notation and some automation.
-
-## Emitting lineage via a separate operator
-
-Take a look at this sample DAG:
+## Debugging
 
-- [`lineage_emission_dag.py`](../../metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py) - emits lineage using the DatahubEmitterOperator.
+### Missing lineage
 
-In order to use this example, you must first configure the Datahub hook. Like in ingestion, we support a Datahub REST hook and a Kafka-based hook. See step 1 above for details.
+If you're not seeing lineage in DataHub, check the following:
 
-## Debugging
+- Validate that the plugin is loaded in Airflow. Go to Admin -> Plugins and check that the DataHub plugin is listed.
+- If using the v2 plugin's automatic lineage, ensure that the `enable_extractors` config is set to true and that automatic lineage is supported for your operator.
+- If using manual lineage annotation, ensure that you're using the `datahub_airflow_plugin.entities.Dataset` or `datahub_airflow_plugin.entities.Urn` classes for your inlets and outlets.
 
 ### Incorrect URLs
 
@@ -179,9 +202,21 @@ If your URLs aren't being generated correctly (usually they'll start with `http:
 
 ```ini title="airflow.cfg"
 [webserver]
-base_url = http://airflow.example.com
+base_url = http://airflow.mycorp.example.com
 ```
 
+## Compatibility
+
+We no longer officially support Airflow <2.1. However, you can use older versions of `acryl-datahub-airflow-plugin` with older versions of Airflow.
+Both of these options support Python 3.7+.
+
+- Airflow 1.10.x, use DataHub plugin v1 with acryl-datahub-airflow-plugin <= 0.9.1.0.
+- Airflow 2.0.x, use DataHub plugin v1 with acryl-datahub-airflow-plugin <= 0.11.0.1.
+
+DataHub also previously supported an Airflow [lineage backend](https://airflow.apache.org/docs/apache-airflow/2.2.0/lineage.html#lineage-backend) implementation. While the implementation is still in our codebase, it is deprecated and will be removed in a future release.
+Note that the lineage backend did not support automatic lineage extraction, did not capture task failures, and did not work in AWS MWAA.
+The [documentation for the lineage backend](https://docs-website-1wmaehubl-acryldata.vercel.app/docs/lineage/airflow/#using-datahubs-airflow-lineage-backend-deprecated) has already been archived.
+
 ## Additional references
 
 Related Datahub videos:
diff --git a/metadata-ingestion-modules/airflow-plugin/build.gradle b/metadata-ingestion-modules/airflow-plugin/build.gradle
index 58a2bc9e670e3..dacf12dc020df 100644
--- a/metadata-ingestion-modules/airflow-plugin/build.gradle
+++ b/metadata-ingestion-modules/airflow-plugin/build.gradle
@@ -10,6 +10,13 @@ ext {
 if (!project.hasProperty("extra_pip_requirements")) {
     ext.extra_pip_requirements = ""
 }
+if (!project.hasProperty("extra_pip_extras")) {
+    ext.extra_pip_extras = "plugin-v2"
+}
+// If extra_pip_extras is non-empty, we need to add a comma to the beginning of the string.
+if (extra_pip_extras != "") {
+    ext.extra_pip_extras = "," + extra_pip_extras
+}
 
 def pip_install_command = "${venv_name}/bin/pip install -e ../../metadata-ingestion"
 
@@ -36,7 +43,7 @@ task installPackage(type: Exec, dependsOn: [environmentSetup, ':metadata-ingesti
   // and https://github.com/datahub-project/datahub/pull/8435.
   commandLine 'bash', '-x', '-c',
     "${pip_install_command} install 'Cython<3.0' 'PyYAML<6' --no-build-isolation && " +
-    "${pip_install_command} -e . ${extra_pip_requirements} &&" +
+    "${pip_install_command} -e .[ignore${extra_pip_extras}] ${extra_pip_requirements} &&" +
     "touch ${sentinel_file}"
 }
 
@@ -47,7 +54,7 @@ task installDev(type: Exec, dependsOn: [install]) {
   inputs.file file('setup.py')
   outputs.file("${sentinel_file}")
   commandLine 'bash', '-x', '-c',
-    "${pip_install_command} -e .[dev]  ${extra_pip_requirements} && " +
+    "${pip_install_command} -e .[dev${extra_pip_extras}] ${extra_pip_requirements} && " +
     "touch ${sentinel_file}"
 }
 
@@ -79,7 +86,8 @@ task installDevTest(type: Exec, dependsOn: [installDev]) {
   outputs.dir("${venv_name}")
   outputs.file("${sentinel_file}")
   commandLine 'bash', '-x', '-c',
-    "${pip_install_command} -e .[dev,integration-tests] && touch ${sentinel_file}"
+    "${pip_install_command} -e .[dev,integration-tests${extra_pip_extras}] ${extra_pip_requirements} && " +
+    "touch ${sentinel_file}"
 }
 
 def testFile = hasProperty('testFile') ? testFile : 'unknown'
@@ -97,20 +105,13 @@ task testSingle(dependsOn: [installDevTest]) {
 }
 
 task testQuick(type: Exec, dependsOn: installDevTest) {
-  // We can't enforce the coverage requirements if we run a subset of the tests.
   inputs.files(project.fileTree(dir: "src/", include: "**/*.py"))
   inputs.files(project.fileTree(dir: "tests/"))
-  outputs.dir("${venv_name}")
   commandLine 'bash', '-x', '-c',
-    "source ${venv_name}/bin/activate && pytest -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
+    "source ${venv_name}/bin/activate && pytest  -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
 }
 
 
-task testFull(type: Exec, dependsOn: [testQuick, installDevTest]) {
-  commandLine 'bash', '-x', '-c',
-    "source ${venv_name}/bin/activate && pytest -m 'not slow_integration' -vv --continue-on-collection-errors --junit-xml=junit.full.xml"
-}
-
 task cleanPythonCache(type: Exec) {
   commandLine 'bash', '-c',
     "find src -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete -o -type d -empty -delete"
diff --git a/metadata-ingestion-modules/airflow-plugin/pyproject.toml b/metadata-ingestion-modules/airflow-plugin/pyproject.toml
index fba81486b9f67..648040c1951db 100644
--- a/metadata-ingestion-modules/airflow-plugin/pyproject.toml
+++ b/metadata-ingestion-modules/airflow-plugin/pyproject.toml
@@ -12,6 +12,7 @@ include = '\.pyi?$'
 
 [tool.isort]
 indent = '    '
+known_future_library = ['__future__', 'datahub.utilities._markupsafe_compat', 'datahub_provider._airflow_compat']
 profile = 'black'
 sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER'
 
diff --git a/metadata-ingestion-modules/airflow-plugin/setup.cfg b/metadata-ingestion-modules/airflow-plugin/setup.cfg
index 157bcce1c298d..c25256c5751b8 100644
--- a/metadata-ingestion-modules/airflow-plugin/setup.cfg
+++ b/metadata-ingestion-modules/airflow-plugin/setup.cfg
@@ -41,29 +41,29 @@ ignore_missing_imports = no
 
 [tool:pytest]
 asyncio_mode = auto
-addopts = --cov=src --cov-report term-missing --cov-config setup.cfg --strict-markers
+addopts = --cov=src --cov-report='' --cov-config setup.cfg --strict-markers -s -v
+markers =
+    integration: marks tests to only run in integration (deselect with '-m "not integration"')
 
 testpaths = 
     tests/unit
     tests/integration
 
-[coverage:run]
-# Because of some quirks in the way setup.cfg, coverage.py, pytest-cov,
-# and tox interact, we should not uncomment the following line.
-# See https://pytest-cov.readthedocs.io/en/latest/config.html and
-# https://coverage.readthedocs.io/en/coverage-5.0/config.html.
-# We also have some additional pytest/cov config options in tox.ini.
-# source = src
+# [coverage:run]
+# # Because of some quirks in the way setup.cfg, coverage.py, pytest-cov,
+# # and tox interact, we should not uncomment the following line.
+# # See https://pytest-cov.readthedocs.io/en/latest/config.html and
+# # https://coverage.readthedocs.io/en/coverage-5.0/config.html.
+# # We also have some additional pytest/cov config options in tox.ini.
+# # source = src
 
-[coverage:paths]
-# This is necessary for tox-based coverage to be counted properly.
-source =
-   src
-   */site-packages
+# [coverage:paths]
+# # This is necessary for tox-based coverage to be counted properly.
+# source =
+#    src
+#    */site-packages
 
 [coverage:report]
-# The fail_under value ensures that at least some coverage data is collected.
-# We override its value in the tox config.
 show_missing = true
 exclude_lines =
     pragma: no cover
diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py
index 47069f59c314d..a5af881022d8c 100644
--- a/metadata-ingestion-modules/airflow-plugin/setup.py
+++ b/metadata-ingestion-modules/airflow-plugin/setup.py
@@ -1,5 +1,6 @@
 import os
 import pathlib
+from typing import Dict, Set
 
 import setuptools
 
@@ -13,23 +14,43 @@ def get_long_description():
     return pathlib.Path(os.path.join(root, "README.md")).read_text()
 
 
+_version = package_metadata["__version__"]
+_self_pin = f"=={_version}" if not _version.endswith("dev0") else ""
+
+
 rest_common = {"requests", "requests_file"}
 
 base_requirements = {
     # Compatibility.
     "dataclasses>=0.6; python_version < '3.7'",
-    # Typing extension should be >=3.10.0.2 ideally but we can't restrict due to Airflow 2.0.2 dependency conflict
-    "typing_extensions>=3.7.4.3 ;  python_version < '3.8'",
-    "typing_extensions>=3.10.0.2,<4.6.0 ;  python_version >= '3.8'",
     "mypy_extensions>=0.4.3",
     # Actual dependencies.
-    "typing-inspect",
     "pydantic>=1.5.1",
     "apache-airflow >= 2.0.2",
     *rest_common,
-    f"acryl-datahub == {package_metadata['__version__']}",
 }
 
+plugins: Dict[str, Set[str]] = {
+    "datahub-rest": {
+        f"acryl-datahub[datahub-rest]{_self_pin}",
+    },
+    "datahub-kafka": {
+        f"acryl-datahub[datahub-kafka]{_self_pin}",
+    },
+    "datahub-file": {
+        f"acryl-datahub[sync-file-emitter]{_self_pin}",
+    },
+    "plugin-v1": set(),
+    "plugin-v2": {
+        # The v2 plugin requires Python 3.8+.
+        f"acryl-datahub[sql-parser]{_self_pin}",
+        "openlineage-airflow==1.2.0; python_version >= '3.8'",
+    },
+}
+
+# Include datahub-rest in the base requirements.
+base_requirements.update(plugins["datahub-rest"])
+
 
 mypy_stubs = {
     "types-dataclasses",
@@ -45,11 +66,9 @@ def get_long_description():
     # versions 0.1.13 and 0.1.14 seem to have issues
     "types-click==0.1.12",
     "types-tabulate",
-    # avrogen package requires this
-    "types-pytz",
 }
 
-base_dev_requirements = {
+dev_requirements = {
     *base_requirements,
     *mypy_stubs,
     "black==22.12.0",
@@ -66,6 +85,7 @@ def get_long_description():
     "pytest-cov>=2.8.1",
     "tox",
     "deepdiff",
+    "tenacity",
     "requests-mock",
     "freezegun",
     "jsonpickle",
@@ -74,8 +94,24 @@ def get_long_description():
     "packaging",
 }
 
-dev_requirements = {
-    *base_dev_requirements,
+integration_test_requirements = {
+    *dev_requirements,
+    *plugins["datahub-file"],
+    *plugins["datahub-kafka"],
+    f"acryl-datahub[testing-utils]{_self_pin}",
+    # Extra requirements for loading our test dags.
+    "apache-airflow[snowflake]>=2.0.2",
+    # https://github.com/snowflakedb/snowflake-sqlalchemy/issues/350
+    # Eventually we want to set this to "snowflake-sqlalchemy>=1.4.3".
+    # However, that doesn't work with older versions of Airflow. Instead
+    # of splitting this into integration-test-old and integration-test-new,
+    # adding a bound to SQLAlchemy was the simplest solution.
+    "sqlalchemy<1.4.42",
+    # To avoid https://github.com/snowflakedb/snowflake-connector-python/issues/1188,
+    # we need https://github.com/snowflakedb/snowflake-connector-python/pull/1193
+    "snowflake-connector-python>=2.7.10",
+    "virtualenv",  # needed by PythonVirtualenvOperator
+    "apache-airflow-providers-sqlite",
 }
 
 
@@ -88,7 +124,7 @@ def get_long_description():
 setuptools.setup(
     # Package metadata.
     name=package_metadata["__package_name__"],
-    version=package_metadata["__version__"],
+    version=_version,
     url="https://datahubproject.io/",
     project_urls={
         "Documentation": "https://datahubproject.io/docs/",
@@ -131,17 +167,8 @@ def get_long_description():
     # Dependencies.
     install_requires=list(base_requirements),
     extras_require={
+        **{plugin: list(dependencies) for plugin, dependencies in plugins.items()},
         "dev": list(dev_requirements),
-        "datahub-kafka": [
-            f"acryl-datahub[datahub-kafka] == {package_metadata['__version__']}"
-        ],
-        "integration-tests": [
-            f"acryl-datahub[datahub-kafka] == {package_metadata['__version__']}",
-            # Extra requirements for Airflow.
-            "apache-airflow[snowflake]>=2.0.2",  # snowflake is used in example dags
-            # Because of https://github.com/snowflakedb/snowflake-sqlalchemy/issues/350 we need to restrict SQLAlchemy's max version.
-            "SQLAlchemy<1.4.42",
-            "virtualenv",  # needed by PythonVirtualenvOperator
-        ],
+        "integration-tests": list(integration_test_requirements),
     },
 )
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_airflow_shims.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_airflow_shims.py
index 5ad20e1f72551..10f014fbd586f 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_airflow_shims.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_airflow_shims.py
@@ -1,3 +1,7 @@
+from typing import List
+
+import airflow.version
+import packaging.version
 from airflow.models.baseoperator import BaseOperator
 
 from datahub_airflow_plugin._airflow_compat import AIRFLOW_PATCHED
@@ -21,7 +25,35 @@
 
 assert AIRFLOW_PATCHED
 
+# Approach suggested by https://stackoverflow.com/a/11887885/5004662.
+AIRFLOW_VERSION = packaging.version.parse(airflow.version.version)
+HAS_AIRFLOW_STANDALONE_CMD = AIRFLOW_VERSION >= packaging.version.parse("2.2.0.dev0")
+HAS_AIRFLOW_LISTENER_API = AIRFLOW_VERSION >= packaging.version.parse("2.3.0.dev0")
+HAS_AIRFLOW_DAG_LISTENER_API = AIRFLOW_VERSION >= packaging.version.parse("2.5.0.dev0")
+
+
+def get_task_inlets(operator: "Operator") -> List:
+    # From Airflow 2.4 _inlets is dropped and inlets used consistently. Earlier it was not the case, so we have to stick there to _inlets
+    if hasattr(operator, "_inlets"):
+        return operator._inlets  # type: ignore[attr-defined, union-attr]
+    if hasattr(operator, "get_inlet_defs"):
+        return operator.get_inlet_defs()  # type: ignore[attr-defined]
+    return operator.inlets
+
+
+def get_task_outlets(operator: "Operator") -> List:
+    # From Airflow 2.4 _outlets is dropped and inlets used consistently. Earlier it was not the case, so we have to stick there to _outlets
+    # We have to use _outlets because outlets is empty in Airflow < 2.4.0
+    if hasattr(operator, "_outlets"):
+        return operator._outlets  # type: ignore[attr-defined, union-attr]
+    if hasattr(operator, "get_outlet_defs"):
+        return operator.get_outlet_defs()
+    return operator.outlets
+
+
 __all__ = [
+    "AIRFLOW_VERSION",
+    "HAS_AIRFLOW_LISTENER_API",
     "Operator",
     "MappedOperator",
     "EmptyOperator",
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py
new file mode 100644
index 0000000000000..67843da2ba995
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py
@@ -0,0 +1,80 @@
+from typing import TYPE_CHECKING, Optional
+
+import datahub.emitter.mce_builder as builder
+from airflow.configuration import conf
+from datahub.configuration.common import ConfigModel
+
+if TYPE_CHECKING:
+    from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook
+
+
+class DatahubLineageConfig(ConfigModel):
+    # This class is shared between the lineage backend and the Airflow plugin.
+    # The defaults listed here are only relevant for the lineage backend.
+    # The Airflow plugin's default values come from the fallback values in
+    # the get_lineage_config() function below.
+
+    enabled: bool = True
+
+    # DataHub hook connection ID.
+    datahub_conn_id: str
+
+    # Cluster to associate with the pipelines and tasks. Defaults to "prod".
+    cluster: str = builder.DEFAULT_FLOW_CLUSTER
+
+    # If true, the owners field of the DAG will be capture as a DataHub corpuser.
+    capture_ownership_info: bool = True
+
+    # If true, the tags field of the DAG will be captured as DataHub tags.
+    capture_tags_info: bool = True
+
+    capture_executions: bool = False
+
+    enable_extractors: bool = True
+
+    log_level: Optional[str] = None
+    debug_emitter: bool = False
+
+    disable_openlineage_plugin: bool = True
+
+    # Note that this field is only respected by the lineage backend.
+    # The Airflow plugin behaves as if it were set to True.
+    graceful_exceptions: bool = True
+
+    def make_emitter_hook(self) -> "DatahubGenericHook":
+        # This is necessary to avoid issues with circular imports.
+        from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook
+
+        return DatahubGenericHook(self.datahub_conn_id)
+
+
+def get_lineage_config() -> DatahubLineageConfig:
+    """Load the DataHub plugin config from airflow.cfg."""
+
+    enabled = conf.get("datahub", "enabled", fallback=True)
+    datahub_conn_id = conf.get("datahub", "conn_id", fallback="datahub_rest_default")
+    cluster = conf.get("datahub", "cluster", fallback=builder.DEFAULT_FLOW_CLUSTER)
+    capture_tags_info = conf.get("datahub", "capture_tags_info", fallback=True)
+    capture_ownership_info = conf.get(
+        "datahub", "capture_ownership_info", fallback=True
+    )
+    capture_executions = conf.get("datahub", "capture_executions", fallback=True)
+    enable_extractors = conf.get("datahub", "enable_extractors", fallback=True)
+    log_level = conf.get("datahub", "log_level", fallback=None)
+    debug_emitter = conf.get("datahub", "debug_emitter", fallback=False)
+    disable_openlineage_plugin = conf.get(
+        "datahub", "disable_openlineage_plugin", fallback=True
+    )
+
+    return DatahubLineageConfig(
+        enabled=enabled,
+        datahub_conn_id=datahub_conn_id,
+        cluster=cluster,
+        capture_ownership_info=capture_ownership_info,
+        capture_tags_info=capture_tags_info,
+        capture_executions=capture_executions,
+        enable_extractors=enable_extractors,
+        log_level=log_level,
+        debug_emitter=debug_emitter,
+        disable_openlineage_plugin=disable_openlineage_plugin,
+    )
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_listener_module.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_listener_module.py
new file mode 100644
index 0000000000000..f39d37b122228
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_listener_module.py
@@ -0,0 +1,7 @@
+from datahub_airflow_plugin.datahub_listener import get_airflow_plugin_listener
+
+_listener = get_airflow_plugin_listener()
+if _listener:
+    on_task_instance_running = _listener.on_task_instance_running
+    on_task_instance_success = _listener.on_task_instance_success
+    on_task_instance_failed = _listener.on_task_instance_failed
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py
new file mode 100644
index 0000000000000..7d35791bf1db4
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py
@@ -0,0 +1,23 @@
+import logging
+
+import datahub.emitter.mce_builder as builder
+from openlineage.client.run import Dataset as OpenLineageDataset
+
+logger = logging.getLogger(__name__)
+
+
+OL_SCHEME_TWEAKS = {
+    "sqlserver": "mssql",
+    "trino": "presto",
+    "awsathena": "athena",
+}
+
+
+def translate_ol_to_datahub_urn(ol_uri: OpenLineageDataset) -> str:
+    namespace = ol_uri.namespace
+    name = ol_uri.name
+
+    scheme, *rest = namespace.split("://", maxsplit=1)
+
+    platform = OL_SCHEME_TWEAKS.get(scheme, scheme)
+    return builder.make_dataset_urn(platform=platform, name=name)
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py
new file mode 100644
index 0000000000000..f84b7b56f6119
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py
@@ -0,0 +1,244 @@
+import contextlib
+import logging
+import unittest.mock
+from typing import TYPE_CHECKING, Optional
+
+import datahub.emitter.mce_builder as builder
+from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
+    get_platform_from_sqlalchemy_uri,
+)
+from datahub.utilities.sqlglot_lineage import (
+    SqlParsingResult,
+    create_lineage_sql_parsed_result,
+)
+from openlineage.airflow.extractors import BaseExtractor
+from openlineage.airflow.extractors import ExtractorManager as OLExtractorManager
+from openlineage.airflow.extractors import TaskMetadata
+from openlineage.airflow.extractors.snowflake_extractor import SnowflakeExtractor
+from openlineage.airflow.extractors.sql_extractor import SqlExtractor
+from openlineage.airflow.utils import get_operator_class, try_import_from_string
+from openlineage.client.facet import (
+    ExtractionError,
+    ExtractionErrorRunFacet,
+    SqlJobFacet,
+)
+
+from datahub_airflow_plugin._airflow_shims import Operator
+from datahub_airflow_plugin._datahub_ol_adapter import OL_SCHEME_TWEAKS
+
+if TYPE_CHECKING:
+    from airflow.models import DagRun, TaskInstance
+    from datahub.ingestion.graph.client import DataHubGraph
+
+logger = logging.getLogger(__name__)
+_DATAHUB_GRAPH_CONTEXT_KEY = "datahub_graph"
+SQL_PARSING_RESULT_KEY = "datahub_sql"
+
+
+class ExtractorManager(OLExtractorManager):
+    # TODO: On Airflow 2.7, the OLExtractorManager is part of the built-in Airflow API.
+    # When available, we should use that instead. The same goe for most of the OL
+    # extractors.
+
+    def __init__(self):
+        super().__init__()
+
+        _sql_operator_overrides = [
+            # The OL BigQuery extractor has some complex logic to fetch detect
+            # the BigQuery job_id and fetch lineage from there. However, it can't
+            # generate CLL, so we disable it and use our own extractor instead.
+            "BigQueryOperator",
+            "BigQueryExecuteQueryOperator",
+            # Athena also does something similar.
+            "AthenaOperator",
+            "AWSAthenaOperator",
+            # Additional types that OL doesn't support. This is only necessary because
+            # on older versions of Airflow, these operators don't inherit from SQLExecuteQueryOperator.
+            "SqliteOperator",
+        ]
+        for operator in _sql_operator_overrides:
+            self.task_to_extractor.extractors[operator] = GenericSqlExtractor
+
+        self._graph: Optional["DataHubGraph"] = None
+
+    @contextlib.contextmanager
+    def _patch_extractors(self):
+        with contextlib.ExitStack() as stack:
+            # Patch the SqlExtractor.extract() method.
+            stack.enter_context(
+                unittest.mock.patch.object(
+                    SqlExtractor,
+                    "extract",
+                    _sql_extractor_extract,
+                )
+            )
+
+            # Patch the SnowflakeExtractor.default_schema property.
+            stack.enter_context(
+                unittest.mock.patch.object(
+                    SnowflakeExtractor,
+                    "default_schema",
+                    property(snowflake_default_schema),
+                )
+            )
+
+            # TODO: Override the BigQuery extractor to use the DataHub SQL parser.
+            # self.extractor_manager.add_extractor()
+
+            # TODO: Override the Athena extractor to use the DataHub SQL parser.
+
+            yield
+
+    def extract_metadata(
+        self,
+        dagrun: "DagRun",
+        task: "Operator",
+        complete: bool = False,
+        task_instance: Optional["TaskInstance"] = None,
+        task_uuid: Optional[str] = None,
+        graph: Optional["DataHubGraph"] = None,
+    ) -> TaskMetadata:
+        self._graph = graph
+        with self._patch_extractors():
+            return super().extract_metadata(
+                dagrun, task, complete, task_instance, task_uuid
+            )
+
+    def _get_extractor(self, task: "Operator") -> Optional[BaseExtractor]:
+        # By adding this, we can use the generic extractor as a fallback for
+        # any operator that inherits from SQLExecuteQueryOperator.
+        clazz = get_operator_class(task)
+        SQLExecuteQueryOperator = try_import_from_string(
+            "airflow.providers.common.sql.operators.sql.SQLExecuteQueryOperator"
+        )
+        if SQLExecuteQueryOperator and issubclass(clazz, SQLExecuteQueryOperator):
+            self.task_to_extractor.extractors.setdefault(
+                clazz.__name__, GenericSqlExtractor
+            )
+
+        extractor = super()._get_extractor(task)
+        if extractor:
+            extractor.set_context(_DATAHUB_GRAPH_CONTEXT_KEY, self._graph)
+        return extractor
+
+
+class GenericSqlExtractor(SqlExtractor):
+    # Note that the extract() method is patched elsewhere.
+
+    @property
+    def default_schema(self):
+        return super().default_schema
+
+    def _get_scheme(self) -> Optional[str]:
+        # Best effort conversion to DataHub platform names.
+
+        with contextlib.suppress(Exception):
+            if self.hook:
+                if hasattr(self.hook, "get_uri"):
+                    uri = self.hook.get_uri()
+                    return get_platform_from_sqlalchemy_uri(uri)
+
+        return self.conn.conn_type or super().dialect
+
+    def _get_database(self) -> Optional[str]:
+        if self.conn:
+            # For BigQuery, the "database" is the project name.
+            if hasattr(self.conn, "project_id"):
+                return self.conn.project_id
+
+            return self.conn.schema
+        return None
+
+
+def _sql_extractor_extract(self: "SqlExtractor") -> TaskMetadata:
+    # Why not override the OL sql_parse method directly, instead of overriding
+    # extract()? A few reasons:
+    #
+    # 1. We would want to pass the default_db and graph instance into our sql parser
+    #    method. The OL code doesn't pass the default_db (despite having it available),
+    #    and it's not clear how to get the graph instance into that method.
+    # 2. OL has some janky logic to fetch table schemas as part of the sql extractor.
+    #    We don't want that behavior and this lets us disable it.
+    # 3. Our SqlParsingResult already has DataHub urns, whereas using SqlMeta would
+    #    require us to convert those urns to OL uris, just for them to get converted
+    #    back to urns later on in our processing.
+
+    task_name = f"{self.operator.dag_id}.{self.operator.task_id}"
+    sql = self.operator.sql
+
+    run_facets = {}
+    job_facets = {"sql": SqlJobFacet(query=self._normalize_sql(sql))}
+
+    # Prepare to run the SQL parser.
+    graph = self.context.get(_DATAHUB_GRAPH_CONTEXT_KEY, None)
+
+    default_database = getattr(self.operator, "database", None)
+    if not default_database:
+        default_database = self.database
+    default_schema = self.default_schema
+
+    # TODO: Add better handling for sql being a list of statements.
+    if isinstance(sql, list):
+        logger.info(f"Got list of SQL statements for {task_name}. Using first one.")
+        sql = sql[0]
+
+    # Run the SQL parser.
+    scheme = self.scheme
+    platform = OL_SCHEME_TWEAKS.get(scheme, scheme)
+    self.log.debug(
+        "Running the SQL parser %s (platform=%s, default db=%s, schema=%s): %s",
+        "with graph client" if graph else "in offline mode",
+        platform,
+        default_database,
+        default_schema,
+        sql,
+    )
+    sql_parsing_result: SqlParsingResult = create_lineage_sql_parsed_result(
+        query=sql,
+        graph=graph,
+        platform=platform,
+        platform_instance=None,
+        env=builder.DEFAULT_ENV,
+        database=default_database,
+        schema=default_schema,
+    )
+    self.log.debug(f"Got sql lineage {sql_parsing_result}")
+
+    if sql_parsing_result.debug_info.error:
+        error = sql_parsing_result.debug_info.error
+        run_facets["extractionError"] = ExtractionErrorRunFacet(
+            totalTasks=1,
+            failedTasks=1,
+            errors=[
+                ExtractionError(
+                    errorMessage=str(error),
+                    stackTrace=None,
+                    task="datahub_sql_parser",
+                    taskNumber=None,
+                )
+            ],
+        )
+
+    # Save sql_parsing_result to the facets dict. It is removed from the
+    # facet dict in the extractor's processing logic.
+    run_facets[SQL_PARSING_RESULT_KEY] = sql_parsing_result  # type: ignore
+
+    return TaskMetadata(
+        name=task_name,
+        inputs=[],
+        outputs=[],
+        run_facets=run_facets,
+        job_facets=job_facets,
+    )
+
+
+def snowflake_default_schema(self: "SnowflakeExtractor") -> Optional[str]:
+    if hasattr(self.operator, "schema") and self.operator.schema is not None:
+        return self.operator.schema
+    return (
+        self.conn.extra_dejson.get("extra__snowflake__schema", "")
+        or self.conn.extra_dejson.get("schema", "")
+        or self.conn.schema
+    )
+    # TODO: Should we try a fallback of:
+    # execute_query_on_hook(self.hook, "SELECT current_schema();")[0][0]
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py
index b5e86e14d85d0..16585f70e820b 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py
@@ -1,4 +1,5 @@
-from typing import TYPE_CHECKING, Dict, List, Optional, Set, Union, cast
+from datetime import datetime
+from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union, cast
 
 from airflow.configuration import conf
 from datahub.api.entities.datajob import DataFlow, DataJob
@@ -6,6 +7,7 @@
     DataProcessInstance,
     InstanceRunResult,
 )
+from datahub.emitter.generic_emitter import Emitter
 from datahub.metadata.schema_classes import DataProcessTypeClass
 from datahub.utilities.urns.data_flow_urn import DataFlowUrn
 from datahub.utilities.urns.data_job_urn import DataJobUrn
@@ -17,8 +19,6 @@
 if TYPE_CHECKING:
     from airflow import DAG
     from airflow.models import DagRun, TaskInstance
-    from datahub.emitter.kafka_emitter import DatahubKafkaEmitter
-    from datahub.emitter.rest_emitter import DatahubRestEmitter
 
     from datahub_airflow_plugin._airflow_shims import Operator
 
@@ -91,7 +91,7 @@ def _get_dependencies(
                 )
 
                 # if the task triggers the subdag, link it to this node in the subdag
-                if subdag_task_id in _task_downstream_task_ids(upstream_task):
+                if subdag_task_id in sorted(_task_downstream_task_ids(upstream_task)):
                     upstream_subdag_triggers.append(upstream_task_urn)
 
         # If the operator is an ExternalTaskSensor then we set the remote task as upstream.
@@ -143,7 +143,7 @@ def generate_dataflow(
         """
         id = dag.dag_id
         orchestrator = "airflow"
-        description = f"{dag.description}\n\n{dag.doc_md or ''}"
+        description = "\n\n".join(filter(None, [dag.description, dag.doc_md])) or None
         data_flow = DataFlow(
             env=cluster, id=id, orchestrator=orchestrator, description=description
         )
@@ -153,7 +153,7 @@ def generate_dataflow(
         allowed_flow_keys = [
             "_access_control",
             "_concurrency",
-            "_default_view",
+            # "_default_view",
             "catchup",
             "fileloc",
             "is_paused_upon_creation",
@@ -171,7 +171,7 @@ def generate_dataflow(
         data_flow.url = f"{base_url}/tree?dag_id={dag.dag_id}"
 
         if capture_owner and dag.owner:
-            data_flow.owners.add(dag.owner)
+            data_flow.owners.update(owner.strip() for owner in dag.owner.split(","))
 
         if capture_tags and dag.tags:
             data_flow.tags.update(dag.tags)
@@ -227,10 +227,7 @@ def generate_datajob(
 
         job_property_bag: Dict[str, str] = {}
 
-        allowed_task_keys = [
-            "_downstream_task_ids",
-            "_inlets",
-            "_outlets",
+        allowed_task_keys: List[Union[str, Tuple[str, ...]]] = [
             "_task_type",
             "_task_module",
             "depends_on_past",
@@ -243,15 +240,28 @@ def generate_datajob(
             "trigger_rule",
             "wait_for_downstream",
             # In Airflow 2.3, _downstream_task_ids was renamed to downstream_task_ids
-            "downstream_task_ids",
+            ("downstream_task_ids", "_downstream_task_ids"),
             # In Airflow 2.4, _inlets and _outlets were removed in favor of non-private versions.
-            "inlets",
-            "outlets",
+            ("inlets", "_inlets"),
+            ("outlets", "_outlets"),
         ]
 
         for key in allowed_task_keys:
-            if hasattr(task, key):
-                job_property_bag[key] = repr(getattr(task, key))
+            if isinstance(key, tuple):
+                out_key: str = key[0]
+                try_keys = key
+            else:
+                out_key = key
+                try_keys = (key,)
+
+            for k in try_keys:
+                if hasattr(task, k):
+                    v = getattr(task, k)
+                    if out_key == "downstream_task_ids":
+                        # Generate these in a consistent order.
+                        v = list(sorted(v))
+                    job_property_bag[out_key] = repr(v)
+                    break
 
         datajob.properties = job_property_bag
         base_url = conf.get("webserver", "base_url")
@@ -288,7 +298,7 @@ def create_datajob_instance(
 
     @staticmethod
     def run_dataflow(
-        emitter: Union["DatahubRestEmitter", "DatahubKafkaEmitter"],
+        emitter: Emitter,
         cluster: str,
         dag_run: "DagRun",
         start_timestamp_millis: Optional[int] = None,
@@ -340,7 +350,7 @@ def run_dataflow(
 
     @staticmethod
     def complete_dataflow(
-        emitter: Union["DatahubRestEmitter", "DatahubKafkaEmitter"],
+        emitter: Emitter,
         cluster: str,
         dag_run: "DagRun",
         end_timestamp_millis: Optional[int] = None,
@@ -348,7 +358,7 @@ def complete_dataflow(
     ) -> None:
         """
 
-        :param emitter: DatahubRestEmitter - the datahub rest emitter to emit the generated mcps
+        :param emitter: Emitter - the datahub emitter to emit the generated mcps
         :param cluster: str - name of the cluster
         :param dag_run: DagRun
         :param end_timestamp_millis: Optional[int] - the completion time in milliseconds if not set the current time will be used.
@@ -386,7 +396,7 @@ def complete_dataflow(
 
     @staticmethod
     def run_datajob(
-        emitter: Union["DatahubRestEmitter", "DatahubKafkaEmitter"],
+        emitter: Emitter,
         cluster: str,
         ti: "TaskInstance",
         dag: "DAG",
@@ -413,16 +423,13 @@ def run_datajob(
         job_property_bag["end_date"] = str(ti.end_date)
         job_property_bag["execution_date"] = str(ti.execution_date)
         job_property_bag["try_number"] = str(ti.try_number - 1)
-        job_property_bag["hostname"] = str(ti.hostname)
         job_property_bag["max_tries"] = str(ti.max_tries)
         # Not compatible with Airflow 1
         if hasattr(ti, "external_executor_id"):
             job_property_bag["external_executor_id"] = str(ti.external_executor_id)
-        job_property_bag["pid"] = str(ti.pid)
         job_property_bag["state"] = str(ti.state)
         job_property_bag["operator"] = str(ti.operator)
         job_property_bag["priority_weight"] = str(ti.priority_weight)
-        job_property_bag["unixname"] = str(ti.unixname)
         job_property_bag["log_url"] = ti.log_url
         dpi.properties.update(job_property_bag)
         dpi.url = ti.log_url
@@ -442,8 +449,10 @@ def run_datajob(
                 dpi.type = DataProcessTypeClass.BATCH_AD_HOC
 
         if start_timestamp_millis is None:
-            assert ti.start_date
-            start_timestamp_millis = int(ti.start_date.timestamp() * 1000)
+            if ti.start_date:
+                start_timestamp_millis = int(ti.start_date.timestamp() * 1000)
+            else:
+                start_timestamp_millis = int(datetime.now().timestamp() * 1000)
 
         if attempt is None:
             attempt = ti.try_number
@@ -458,7 +467,7 @@ def run_datajob(
 
     @staticmethod
     def complete_datajob(
-        emitter: Union["DatahubRestEmitter", "DatahubKafkaEmitter"],
+        emitter: Emitter,
         cluster: str,
         ti: "TaskInstance",
         dag: "DAG",
@@ -469,7 +478,7 @@ def complete_datajob(
     ) -> DataProcessInstance:
         """
 
-        :param emitter: DatahubRestEmitter
+        :param emitter: Emitter - the datahub emitter to emit the generated mcps
         :param cluster: str
         :param ti: TaskInstance
         :param dag: DAG
@@ -483,8 +492,10 @@ def complete_datajob(
             datajob = AirflowGenerator.generate_datajob(cluster, ti.task, dag)
 
         if end_timestamp_millis is None:
-            assert ti.end_date
-            end_timestamp_millis = int(ti.end_date.timestamp() * 1000)
+            if ti.end_date:
+                end_timestamp_millis = int(ti.end_date.timestamp() * 1000)
+            else:
+                end_timestamp_millis = int(datetime.now().timestamp() * 1000)
 
         if result is None:
             # We should use TaskInstanceState but it is not available in Airflow 1
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py
new file mode 100644
index 0000000000000..a3f5cb489e29f
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py
@@ -0,0 +1,494 @@
+import copy
+import functools
+import logging
+import threading
+from typing import TYPE_CHECKING, Callable, Dict, List, Optional, TypeVar, cast
+
+import airflow
+import datahub.emitter.mce_builder as builder
+from datahub.api.entities.datajob import DataJob
+from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
+from datahub.emitter.rest_emitter import DatahubRestEmitter
+from datahub.ingestion.graph.client import DataHubGraph
+from datahub.metadata.schema_classes import (
+    FineGrainedLineageClass,
+    FineGrainedLineageDownstreamTypeClass,
+    FineGrainedLineageUpstreamTypeClass,
+)
+from datahub.telemetry import telemetry
+from datahub.utilities.sqlglot_lineage import SqlParsingResult
+from datahub.utilities.urns.dataset_urn import DatasetUrn
+from openlineage.airflow.listener import TaskHolder
+from openlineage.airflow.utils import redact_with_exclusions
+from openlineage.client.serde import Serde
+
+from datahub_airflow_plugin._airflow_shims import (
+    HAS_AIRFLOW_DAG_LISTENER_API,
+    Operator,
+    get_task_inlets,
+    get_task_outlets,
+)
+from datahub_airflow_plugin._config import DatahubLineageConfig, get_lineage_config
+from datahub_airflow_plugin._datahub_ol_adapter import translate_ol_to_datahub_urn
+from datahub_airflow_plugin._extractors import SQL_PARSING_RESULT_KEY, ExtractorManager
+from datahub_airflow_plugin.client.airflow_generator import AirflowGenerator
+from datahub_airflow_plugin.entities import _Entity
+
+_F = TypeVar("_F", bound=Callable[..., None])
+if TYPE_CHECKING:
+    from airflow.models import DAG, DagRun, TaskInstance
+    from sqlalchemy.orm import Session
+
+    # To placate mypy on Airflow versions that don't have the listener API,
+    # we define a dummy hookimpl that's an identity function.
+
+    def hookimpl(f: _F) -> _F:  # type: ignore[misc] # noqa: F811
+        return f
+
+else:
+    from airflow.listeners import hookimpl
+
+logger = logging.getLogger(__name__)
+
+_airflow_listener_initialized = False
+_airflow_listener: Optional["DataHubListener"] = None
+_RUN_IN_THREAD = True
+_RUN_IN_THREAD_TIMEOUT = 30
+
+
+def get_airflow_plugin_listener() -> Optional["DataHubListener"]:
+    # Using globals instead of functools.lru_cache to make testing easier.
+    global _airflow_listener_initialized
+    global _airflow_listener
+
+    if not _airflow_listener_initialized:
+        _airflow_listener_initialized = True
+
+        plugin_config = get_lineage_config()
+
+        if plugin_config.enabled:
+            _airflow_listener = DataHubListener(config=plugin_config)
+
+            if plugin_config.disable_openlineage_plugin:
+                # Deactivate the OpenLineagePlugin listener to avoid conflicts.
+                from openlineage.airflow.plugin import OpenLineagePlugin
+
+                OpenLineagePlugin.listeners = []
+
+            telemetry.telemetry_instance.ping(
+                "airflow-plugin-init",
+                {
+                    "airflow-version": airflow.__version__,
+                    "datahub-airflow-plugin": "v2",
+                    "datahub-airflow-plugin-dag-events": HAS_AIRFLOW_DAG_LISTENER_API,
+                    "capture_executions": plugin_config.capture_executions,
+                    "capture_tags": plugin_config.capture_tags_info,
+                    "capture_ownership": plugin_config.capture_ownership_info,
+                    "enable_extractors": plugin_config.enable_extractors,
+                    "disable_openlineage_plugin": plugin_config.disable_openlineage_plugin,
+                },
+            )
+    return _airflow_listener
+
+
+def run_in_thread(f: _F) -> _F:
+    # This is also responsible for catching exceptions and logging them.
+
+    @functools.wraps(f)
+    def wrapper(*args, **kwargs):
+        try:
+            if _RUN_IN_THREAD:
+                # A poor-man's timeout mechanism.
+                # This ensures that we don't hang the task if the extractors
+                # are slow or the DataHub API is slow to respond.
+
+                thread = threading.Thread(
+                    target=f, args=args, kwargs=kwargs, daemon=True
+                )
+                thread.start()
+
+                thread.join(timeout=_RUN_IN_THREAD_TIMEOUT)
+                if thread.is_alive():
+                    logger.warning(
+                        f"Thread for {f.__name__} is still running after {_RUN_IN_THREAD_TIMEOUT} seconds. "
+                        "Continuing without waiting for it to finish."
+                    )
+            else:
+                f(*args, **kwargs)
+        except Exception as e:
+            logger.exception(e)
+
+    return cast(_F, wrapper)
+
+
+class DataHubListener:
+    __name__ = "DataHubListener"
+
+    def __init__(self, config: DatahubLineageConfig):
+        self.config = config
+        self._set_log_level()
+
+        self._emitter = config.make_emitter_hook().make_emitter()
+        self._graph: Optional[DataHubGraph] = None
+        logger.info(f"DataHub plugin using {repr(self._emitter)}")
+
+        # See discussion here https://github.com/OpenLineage/OpenLineage/pull/508 for
+        # why we need to keep track of tasks ourselves.
+        self._task_holder = TaskHolder()
+
+        # In our case, we also want to cache the initial datajob object
+        # so that we can add to it when the task completes.
+        self._datajob_holder: Dict[str, DataJob] = {}
+
+        self.extractor_manager = ExtractorManager()
+
+        # This "inherits" from types.ModuleType to avoid issues with Airflow's listener plugin loader.
+        # It previously (v2.4.x and likely other versions too) would throw errors if it was not a module.
+        # https://github.com/apache/airflow/blob/e99a518970b2d349a75b1647f6b738c8510fa40e/airflow/listeners/listener.py#L56
+        # self.__class__ = types.ModuleType
+
+    @property
+    def emitter(self):
+        return self._emitter
+
+    @property
+    def graph(self) -> Optional[DataHubGraph]:
+        if self._graph:
+            return self._graph
+
+        if isinstance(self._emitter, DatahubRestEmitter) and not isinstance(
+            self._emitter, DataHubGraph
+        ):
+            # This is lazy initialized to avoid throwing errors on plugin load.
+            self._graph = self._emitter.to_graph()
+            self._emitter = self._graph
+
+        return self._graph
+
+    def _set_log_level(self) -> None:
+        """Set the log level for the plugin and its dependencies.
+
+        This may need to be called multiple times, since Airflow sometimes
+        messes with the logging configuration after the plugin is loaded.
+        In particular, the loggers may get changed when the worker starts
+        executing a task.
+        """
+
+        if self.config.log_level:
+            logging.getLogger(__name__.split(".")[0]).setLevel(self.config.log_level)
+        if self.config.debug_emitter:
+            logging.getLogger("datahub.emitter").setLevel(logging.DEBUG)
+
+    def _make_emit_callback(self) -> Callable[[Optional[Exception], str], None]:
+        def emit_callback(err: Optional[Exception], msg: str) -> None:
+            if err:
+                logger.error(f"Error sending metadata to datahub: {msg}", exc_info=err)
+
+        return emit_callback
+
+    def _extract_lineage(
+        self,
+        datajob: DataJob,
+        dagrun: "DagRun",
+        task: "Operator",
+        task_instance: "TaskInstance",
+        complete: bool = False,
+    ) -> None:
+        """
+        Combine lineage (including column lineage) from task inlets/outlets and
+        extractor-generated task_metadata and write it to the datajob. This
+        routine is also responsible for converting the lineage to DataHub URNs.
+        """
+
+        input_urns: List[str] = []
+        output_urns: List[str] = []
+        fine_grained_lineages: List[FineGrainedLineageClass] = []
+
+        task_metadata = None
+        if self.config.enable_extractors:
+            task_metadata = self.extractor_manager.extract_metadata(
+                dagrun,
+                task,
+                complete=complete,
+                task_instance=task_instance,
+                task_uuid=str(datajob.urn),
+                graph=self.graph,
+            )
+            logger.debug(f"Got task metadata: {task_metadata}")
+
+            # Translate task_metadata.inputs/outputs to DataHub URNs.
+            input_urns.extend(
+                translate_ol_to_datahub_urn(dataset) for dataset in task_metadata.inputs
+            )
+            output_urns.extend(
+                translate_ol_to_datahub_urn(dataset)
+                for dataset in task_metadata.outputs
+            )
+
+        # Add DataHub-native SQL parser results.
+        sql_parsing_result: Optional[SqlParsingResult] = None
+        if task_metadata:
+            sql_parsing_result = task_metadata.run_facets.pop(
+                SQL_PARSING_RESULT_KEY, None
+            )
+        if sql_parsing_result:
+            if sql_parsing_result.debug_info.error:
+                datajob.properties["datahub_sql_parser_error"] = str(
+                    sql_parsing_result.debug_info.error
+                )
+            if not sql_parsing_result.debug_info.table_error:
+                input_urns.extend(sql_parsing_result.in_tables)
+                output_urns.extend(sql_parsing_result.out_tables)
+
+                if sql_parsing_result.column_lineage:
+                    fine_grained_lineages.extend(
+                        FineGrainedLineageClass(
+                            upstreamType=FineGrainedLineageUpstreamTypeClass.FIELD_SET,
+                            downstreamType=FineGrainedLineageDownstreamTypeClass.FIELD,
+                            upstreams=[
+                                builder.make_schema_field_urn(
+                                    upstream.table, upstream.column
+                                )
+                                for upstream in column_lineage.upstreams
+                            ],
+                            downstreams=[
+                                builder.make_schema_field_urn(
+                                    downstream.table, downstream.column
+                                )
+                                for downstream in [column_lineage.downstream]
+                                if downstream.table
+                            ],
+                        )
+                        for column_lineage in sql_parsing_result.column_lineage
+                    )
+
+        # Add DataHub-native inlets/outlets.
+        # These are filtered out by the extractor, so we need to add them manually.
+        input_urns.extend(
+            iolet.urn for iolet in get_task_inlets(task) if isinstance(iolet, _Entity)
+        )
+        output_urns.extend(
+            iolet.urn for iolet in get_task_outlets(task) if isinstance(iolet, _Entity)
+        )
+
+        # Write the lineage to the datajob object.
+        datajob.inlets.extend(DatasetUrn.create_from_string(urn) for urn in input_urns)
+        datajob.outlets.extend(
+            DatasetUrn.create_from_string(urn) for urn in output_urns
+        )
+        datajob.fine_grained_lineages.extend(fine_grained_lineages)
+
+        # Merge in extra stuff that was present in the DataJob we constructed
+        # at the start of the task.
+        if complete:
+            original_datajob = self._datajob_holder.get(str(datajob.urn), None)
+        else:
+            self._datajob_holder[str(datajob.urn)] = datajob
+            original_datajob = None
+
+        if original_datajob:
+            logger.debug("Merging start datajob into finish datajob")
+            datajob.inlets.extend(original_datajob.inlets)
+            datajob.outlets.extend(original_datajob.outlets)
+            datajob.fine_grained_lineages.extend(original_datajob.fine_grained_lineages)
+
+            for k, v in original_datajob.properties.items():
+                datajob.properties.setdefault(k, v)
+
+        # Deduplicate inlets/outlets.
+        datajob.inlets = list(sorted(set(datajob.inlets), key=lambda x: str(x)))
+        datajob.outlets = list(sorted(set(datajob.outlets), key=lambda x: str(x)))
+
+        # Write all other OL facets as DataHub properties.
+        if task_metadata:
+            for k, v in task_metadata.job_facets.items():
+                datajob.properties[f"openlineage_job_facet_{k}"] = Serde.to_json(
+                    redact_with_exclusions(v)
+                )
+
+            for k, v in task_metadata.run_facets.items():
+                datajob.properties[f"openlineage_run_facet_{k}"] = Serde.to_json(
+                    redact_with_exclusions(v)
+                )
+
+    @hookimpl
+    @run_in_thread
+    def on_task_instance_running(
+        self,
+        previous_state: None,
+        task_instance: "TaskInstance",
+        session: "Session",  # This will always be QUEUED
+    ) -> None:
+        self._set_log_level()
+
+        # This if statement mirrors the logic in https://github.com/OpenLineage/OpenLineage/pull/508.
+        if not hasattr(task_instance, "task"):
+            # The type ignore is to placate mypy on Airflow 2.1.x.
+            logger.warning(
+                f"No task set for task_id: {task_instance.task_id} - "  # type: ignore[attr-defined]
+                f"dag_id: {task_instance.dag_id} - run_id {task_instance.run_id}"  # type: ignore[attr-defined]
+            )
+            return
+
+        logger.debug(
+            f"DataHub listener got notification about task instance start for {task_instance.task_id}"
+        )
+
+        # Render templates in a copy of the task instance.
+        # This is necessary to get the correct operator args in the extractors.
+        task_instance = copy.deepcopy(task_instance)
+        task_instance.render_templates()
+
+        # The type ignore is to placate mypy on Airflow 2.1.x.
+        dagrun: "DagRun" = task_instance.dag_run  # type: ignore[attr-defined]
+        task = task_instance.task
+        dag: "DAG" = task.dag  # type: ignore[assignment]
+
+        self._task_holder.set_task(task_instance)
+
+        # Handle async operators in Airflow 2.3 by skipping deferred state.
+        # Inspired by https://github.com/OpenLineage/OpenLineage/pull/1601
+        if task_instance.next_method is not None:  # type: ignore[attr-defined]
+            return
+
+        # If we don't have the DAG listener API, we just pretend that
+        # the start of the task is the start of the DAG.
+        # This generates duplicate events, but it's better than not
+        # generating anything.
+        if not HAS_AIRFLOW_DAG_LISTENER_API:
+            self.on_dag_start(dagrun)
+
+        datajob = AirflowGenerator.generate_datajob(
+            cluster=self.config.cluster,
+            task=task,
+            dag=dag,
+            capture_tags=self.config.capture_tags_info,
+            capture_owner=self.config.capture_ownership_info,
+        )
+
+        # TODO: Make use of get_task_location to extract github urls.
+
+        # Add lineage info.
+        self._extract_lineage(datajob, dagrun, task, task_instance)
+
+        # TODO: Add handling for Airflow mapped tasks using task_instance.map_index
+
+        datajob.emit(self.emitter, callback=self._make_emit_callback())
+        logger.debug(f"Emitted DataHub Datajob start: {datajob}")
+
+        if self.config.capture_executions:
+            dpi = AirflowGenerator.run_datajob(
+                emitter=self.emitter,
+                cluster=self.config.cluster,
+                ti=task_instance,
+                dag=dag,
+                dag_run=dagrun,
+                datajob=datajob,
+                emit_templates=False,
+            )
+            logger.debug(f"Emitted DataHub DataProcess Instance start: {dpi}")
+
+        self.emitter.flush()
+
+        logger.debug(
+            f"DataHub listener finished processing notification about task instance start for {task_instance.task_id}"
+        )
+
+    def on_task_instance_finish(
+        self, task_instance: "TaskInstance", status: InstanceRunResult
+    ) -> None:
+        dagrun: "DagRun" = task_instance.dag_run  # type: ignore[attr-defined]
+        task = self._task_holder.get_task(task_instance) or task_instance.task
+        dag: "DAG" = task.dag  # type: ignore[assignment]
+
+        datajob = AirflowGenerator.generate_datajob(
+            cluster=self.config.cluster,
+            task=task,
+            dag=dag,
+            capture_tags=self.config.capture_tags_info,
+            capture_owner=self.config.capture_ownership_info,
+        )
+
+        # Add lineage info.
+        self._extract_lineage(datajob, dagrun, task, task_instance, complete=True)
+
+        datajob.emit(self.emitter, callback=self._make_emit_callback())
+        logger.debug(f"Emitted DataHub Datajob finish w/ status {status}: {datajob}")
+
+        if self.config.capture_executions:
+            dpi = AirflowGenerator.complete_datajob(
+                emitter=self.emitter,
+                cluster=self.config.cluster,
+                ti=task_instance,
+                dag=dag,
+                dag_run=dagrun,
+                datajob=datajob,
+                result=status,
+            )
+            logger.debug(
+                f"Emitted DataHub DataProcess Instance with status {status}: {dpi}"
+            )
+
+        self.emitter.flush()
+
+    @hookimpl
+    @run_in_thread
+    def on_task_instance_success(
+        self, previous_state: None, task_instance: "TaskInstance", session: "Session"
+    ) -> None:
+        self._set_log_level()
+
+        logger.debug(
+            f"DataHub listener got notification about task instance success for {task_instance.task_id}"
+        )
+        self.on_task_instance_finish(task_instance, status=InstanceRunResult.SUCCESS)
+        logger.debug(
+            f"DataHub listener finished processing task instance success for {task_instance.task_id}"
+        )
+
+    @hookimpl
+    @run_in_thread
+    def on_task_instance_failed(
+        self, previous_state: None, task_instance: "TaskInstance", session: "Session"
+    ) -> None:
+        self._set_log_level()
+
+        logger.debug(
+            f"DataHub listener got notification about task instance failure for {task_instance.task_id}"
+        )
+
+        # TODO: Handle UP_FOR_RETRY state.
+        self.on_task_instance_finish(task_instance, status=InstanceRunResult.FAILURE)
+        logger.debug(
+            f"DataHub listener finished processing task instance failure for {task_instance.task_id}"
+        )
+
+    def on_dag_start(self, dag_run: "DagRun") -> None:
+        dag = dag_run.dag
+        if not dag:
+            return
+
+        dataflow = AirflowGenerator.generate_dataflow(
+            cluster=self.config.cluster,
+            dag=dag,
+            capture_tags=self.config.capture_tags_info,
+            capture_owner=self.config.capture_ownership_info,
+        )
+        dataflow.emit(self.emitter, callback=self._make_emit_callback())
+
+    if HAS_AIRFLOW_DAG_LISTENER_API:
+
+        @hookimpl
+        @run_in_thread
+        def on_dag_run_running(self, dag_run: "DagRun", msg: str) -> None:
+            self._set_log_level()
+
+            logger.debug(
+                f"DataHub listener got notification about dag run start for {dag_run.dag_id}"
+            )
+
+            self.on_dag_start(dag_run)
+
+            self.emitter.flush()
+
+    # TODO: Add hooks for on_dag_run_success, on_dag_run_failed -> call AirflowGenerator.complete_dataflow
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py
index d1cec9e5c1b54..c96fab31647f5 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py
@@ -1,367 +1,74 @@
 import contextlib
 import logging
-import traceback
-from typing import Any, Callable, Iterable, List, Optional, Union
+import os
 
-from airflow.configuration import conf
-from airflow.lineage import PIPELINE_OUTLETS
-from airflow.models.baseoperator import BaseOperator
 from airflow.plugins_manager import AirflowPlugin
-from airflow.utils.module_loading import import_string
-from cattr import structure
-from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
 
 from datahub_airflow_plugin._airflow_compat import AIRFLOW_PATCHED
-from datahub_airflow_plugin._airflow_shims import MappedOperator, Operator
-from datahub_airflow_plugin.client.airflow_generator import AirflowGenerator
-from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook
-from datahub_airflow_plugin.lineage.datahub import DatahubLineageConfig
+from datahub_airflow_plugin._airflow_shims import (
+    HAS_AIRFLOW_DAG_LISTENER_API,
+    HAS_AIRFLOW_LISTENER_API,
+)
 
 assert AIRFLOW_PATCHED
 logger = logging.getLogger(__name__)
 
-TASK_ON_FAILURE_CALLBACK = "on_failure_callback"
-TASK_ON_SUCCESS_CALLBACK = "on_success_callback"
 
+_USE_AIRFLOW_LISTENER_INTERFACE = HAS_AIRFLOW_LISTENER_API and not os.getenv(
+    "DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN", "false"
+).lower() in ("true", "1")
 
-def get_lineage_config() -> DatahubLineageConfig:
-    """Load the lineage config from airflow.cfg."""
+if _USE_AIRFLOW_LISTENER_INTERFACE:
+    try:
+        from openlineage.airflow.utils import try_import_from_string  # noqa: F401
+    except ImportError:
+        # If v2 plugin dependencies are not installed, we fall back to v1.
+        logger.debug("Falling back to v1 plugin due to missing dependencies.")
+        _USE_AIRFLOW_LISTENER_INTERFACE = False
 
-    enabled = conf.get("datahub", "enabled", fallback=True)
-    datahub_conn_id = conf.get("datahub", "conn_id", fallback="datahub_rest_default")
-    cluster = conf.get("datahub", "cluster", fallback="prod")
-    graceful_exceptions = conf.get("datahub", "graceful_exceptions", fallback=True)
-    capture_tags_info = conf.get("datahub", "capture_tags_info", fallback=True)
-    capture_ownership_info = conf.get(
-        "datahub", "capture_ownership_info", fallback=True
-    )
-    capture_executions = conf.get("datahub", "capture_executions", fallback=True)
-    return DatahubLineageConfig(
-        enabled=enabled,
-        datahub_conn_id=datahub_conn_id,
-        cluster=cluster,
-        graceful_exceptions=graceful_exceptions,
-        capture_ownership_info=capture_ownership_info,
-        capture_tags_info=capture_tags_info,
-        capture_executions=capture_executions,
-    )
 
+with contextlib.suppress(Exception):
+    if not os.getenv("DATAHUB_AIRFLOW_PLUGIN_SKIP_FORK_PATCH", "false").lower() in (
+        "true",
+        "1",
+    ):
+        # From https://github.com/apache/airflow/discussions/24463#discussioncomment-4404542
+        # I'm not exactly sure why this fixes it, but I suspect it's that this
+        # forces the proxy settings to get cached before the fork happens.
+        #
+        # For more details, see https://github.com/python/cpython/issues/58037
+        # and https://wefearchange.org/2018/11/forkmacos.rst.html
+        # and https://bugs.python.org/issue30385#msg293958
+        # An alternative fix is to set NO_PROXY='*'
 
-def _task_inlets(operator: "Operator") -> List:
-    # From Airflow 2.4 _inlets is dropped and inlets used consistently. Earlier it was not the case, so we have to stick there to _inlets
-    if hasattr(operator, "_inlets"):
-        return operator._inlets  # type: ignore[attr-defined, union-attr]
-    return operator.inlets
+        from _scproxy import _get_proxy_settings
 
+        _get_proxy_settings()
 
-def _task_outlets(operator: "Operator") -> List:
-    # From Airflow 2.4 _outlets is dropped and inlets used consistently. Earlier it was not the case, so we have to stick there to _outlets
-    # We have to use _outlets because outlets is empty in Airflow < 2.4.0
-    if hasattr(operator, "_outlets"):
-        return operator._outlets  # type: ignore[attr-defined, union-attr]
-    return operator.outlets
 
+class DatahubPlugin(AirflowPlugin):
+    name = "datahub_plugin"
 
-def get_inlets_from_task(task: BaseOperator, context: Any) -> Iterable[Any]:
-    # TODO: Fix for https://github.com/apache/airflow/commit/1b1f3fabc5909a447a6277cafef3a0d4ef1f01ae
-    # in Airflow 2.4.
-    # TODO: ignore/handle airflow's dataset type in our lineage
-
-    inlets: List[Any] = []
-    task_inlets = _task_inlets(task)
-    # From Airflow 2.3 this should be AbstractOperator but due to compatibility reason lets use BaseOperator
-    if isinstance(task_inlets, (str, BaseOperator)):
-        inlets = [
-            task_inlets,
-        ]
-
-    if task_inlets and isinstance(task_inlets, list):
-        inlets = []
-        task_ids = (
-            {o for o in task_inlets if isinstance(o, str)}
-            .union(op.task_id for op in task_inlets if isinstance(op, BaseOperator))
-            .intersection(task.get_flat_relative_ids(upstream=True))
-        )
-
-        from airflow.lineage import AUTO
-
-        # pick up unique direct upstream task_ids if AUTO is specified
-        if AUTO.upper() in task_inlets or AUTO.lower() in task_inlets:
-            print("Picking up unique direct upstream task_ids as AUTO is specified")
-            task_ids = task_ids.union(
-                task_ids.symmetric_difference(task.upstream_task_ids)
-            )
-
-        inlets = task.xcom_pull(
-            context, task_ids=list(task_ids), dag_id=task.dag_id, key=PIPELINE_OUTLETS
-        )
-
-        # re-instantiate the obtained inlets
-        inlets = [
-            structure(item["data"], import_string(item["type_name"]))
-            # _get_instance(structure(item, Metadata))
-            for sublist in inlets
-            if sublist
-            for item in sublist
-        ]
-
-        for inlet in task_inlets:
-            if not isinstance(inlet, str):
-                inlets.append(inlet)
-
-    return inlets
-
-
-def _make_emit_callback(
-    logger: logging.Logger,
-) -> Callable[[Optional[Exception], str], None]:
-    def emit_callback(err: Optional[Exception], msg: str) -> None:
-        if err:
-            logger.error(f"Error sending metadata to datahub: {msg}", exc_info=err)
-
-    return emit_callback
-
-
-def datahub_task_status_callback(context, status):
-    ti = context["ti"]
-    task: "BaseOperator" = ti.task
-    dag = context["dag"]
-
-    # This code is from the original airflow lineage code ->
-    # https://github.com/apache/airflow/blob/main/airflow/lineage/__init__.py
-    inlets = get_inlets_from_task(task, context)
-
-    emitter = (
-        DatahubGenericHook(context["_datahub_config"].datahub_conn_id)
-        .get_underlying_hook()
-        .make_emitter()
-    )
-
-    dataflow = AirflowGenerator.generate_dataflow(
-        cluster=context["_datahub_config"].cluster,
-        dag=dag,
-        capture_tags=context["_datahub_config"].capture_tags_info,
-        capture_owner=context["_datahub_config"].capture_ownership_info,
-    )
-    task.log.info(f"Emitting Datahub Dataflow: {dataflow}")
-    dataflow.emit(emitter, callback=_make_emit_callback(task.log))
-
-    datajob = AirflowGenerator.generate_datajob(
-        cluster=context["_datahub_config"].cluster,
-        task=task,
-        dag=dag,
-        capture_tags=context["_datahub_config"].capture_tags_info,
-        capture_owner=context["_datahub_config"].capture_ownership_info,
-    )
-
-    for inlet in inlets:
-        datajob.inlets.append(inlet.urn)
-
-    task_outlets = _task_outlets(task)
-    for outlet in task_outlets:
-        datajob.outlets.append(outlet.urn)
-
-    task.log.info(f"Emitting Datahub Datajob: {datajob}")
-    datajob.emit(emitter, callback=_make_emit_callback(task.log))
-
-    if context["_datahub_config"].capture_executions:
-        dpi = AirflowGenerator.run_datajob(
-            emitter=emitter,
-            cluster=context["_datahub_config"].cluster,
-            ti=context["ti"],
-            dag=dag,
-            dag_run=context["dag_run"],
-            datajob=datajob,
-            start_timestamp_millis=int(ti.start_date.timestamp() * 1000),
-        )
-
-        task.log.info(f"Emitted Start Datahub Dataprocess Instance: {dpi}")
-
-        dpi = AirflowGenerator.complete_datajob(
-            emitter=emitter,
-            cluster=context["_datahub_config"].cluster,
-            ti=context["ti"],
-            dag_run=context["dag_run"],
-            result=status,
-            dag=dag,
-            datajob=datajob,
-            end_timestamp_millis=int(ti.end_date.timestamp() * 1000),
-        )
-        task.log.info(f"Emitted Completed Data Process Instance: {dpi}")
-
-    emitter.flush()
-
-
-def datahub_pre_execution(context):
-    ti = context["ti"]
-    task: "BaseOperator" = ti.task
-    dag = context["dag"]
-
-    task.log.info("Running Datahub pre_execute method")
-
-    emitter = (
-        DatahubGenericHook(context["_datahub_config"].datahub_conn_id)
-        .get_underlying_hook()
-        .make_emitter()
-    )
-
-    # This code is from the original airflow lineage code ->
-    # https://github.com/apache/airflow/blob/main/airflow/lineage/__init__.py
-    inlets = get_inlets_from_task(task, context)
-
-    datajob = AirflowGenerator.generate_datajob(
-        cluster=context["_datahub_config"].cluster,
-        task=context["ti"].task,
-        dag=dag,
-        capture_tags=context["_datahub_config"].capture_tags_info,
-        capture_owner=context["_datahub_config"].capture_ownership_info,
-    )
-
-    for inlet in inlets:
-        datajob.inlets.append(inlet.urn)
-
-    task_outlets = _task_outlets(task)
-
-    for outlet in task_outlets:
-        datajob.outlets.append(outlet.urn)
-
-    task.log.info(f"Emitting Datahub dataJob {datajob}")
-    datajob.emit(emitter, callback=_make_emit_callback(task.log))
-
-    if context["_datahub_config"].capture_executions:
-        dpi = AirflowGenerator.run_datajob(
-            emitter=emitter,
-            cluster=context["_datahub_config"].cluster,
-            ti=context["ti"],
-            dag=dag,
-            dag_run=context["dag_run"],
-            datajob=datajob,
-            start_timestamp_millis=int(ti.start_date.timestamp() * 1000),
-        )
-
-        task.log.info(f"Emitting Datahub Dataprocess Instance: {dpi}")
-
-    emitter.flush()
-
-
-def _wrap_pre_execution(pre_execution):
-    def custom_pre_execution(context):
-        config = get_lineage_config()
-        if config.enabled:
-            context["_datahub_config"] = config
-            datahub_pre_execution(context)
-
-        # Call original policy
-        if pre_execution:
-            pre_execution(context)
-
-    return custom_pre_execution
-
-
-def _wrap_on_failure_callback(on_failure_callback):
-    def custom_on_failure_callback(context):
-        config = get_lineage_config()
-        if config.enabled:
-            context["_datahub_config"] = config
-            try:
-                datahub_task_status_callback(context, status=InstanceRunResult.FAILURE)
-            except Exception as e:
-                if not config.graceful_exceptions:
-                    raise e
-                else:
-                    print(f"Exception: {traceback.format_exc()}")
-
-        # Call original policy
-        if on_failure_callback:
-            on_failure_callback(context)
-
-    return custom_on_failure_callback
-
-
-def _wrap_on_success_callback(on_success_callback):
-    def custom_on_success_callback(context):
-        config = get_lineage_config()
-        if config.enabled:
-            context["_datahub_config"] = config
-            try:
-                datahub_task_status_callback(context, status=InstanceRunResult.SUCCESS)
-            except Exception as e:
-                if not config.graceful_exceptions:
-                    raise e
-                else:
-                    print(f"Exception: {traceback.format_exc()}")
-
-        # Call original policy
-        if on_success_callback:
-            on_success_callback(context)
-
-    return custom_on_success_callback
-
-
-def task_policy(task: Union[BaseOperator, MappedOperator]) -> None:
-    task.log.debug(f"Setting task policy for Dag: {task.dag_id} Task: {task.task_id}")
-    # task.add_inlets(["auto"])
-    # task.pre_execute = _wrap_pre_execution(task.pre_execute)
-
-    # MappedOperator's callbacks don't have setters until Airflow 2.X.X
-    # https://github.com/apache/airflow/issues/24547
-    # We can bypass this by going through partial_kwargs for now
-    if MappedOperator and isinstance(task, MappedOperator):  # type: ignore
-        on_failure_callback_prop: property = getattr(
-            MappedOperator, TASK_ON_FAILURE_CALLBACK
-        )
-        on_success_callback_prop: property = getattr(
-            MappedOperator, TASK_ON_SUCCESS_CALLBACK
-        )
-        if not on_failure_callback_prop.fset or not on_success_callback_prop.fset:
-            task.log.debug(
-                "Using MappedOperator's partial_kwargs instead of callback properties"
-            )
-            task.partial_kwargs[TASK_ON_FAILURE_CALLBACK] = _wrap_on_failure_callback(
-                task.on_failure_callback
+    if _USE_AIRFLOW_LISTENER_INTERFACE:
+        if HAS_AIRFLOW_DAG_LISTENER_API:
+            from datahub_airflow_plugin.datahub_listener import (  # type: ignore[misc]
+                get_airflow_plugin_listener,
             )
-            task.partial_kwargs[TASK_ON_SUCCESS_CALLBACK] = _wrap_on_success_callback(
-                task.on_success_callback
-            )
-            return
-
-    task.on_failure_callback = _wrap_on_failure_callback(task.on_failure_callback)  # type: ignore
-    task.on_success_callback = _wrap_on_success_callback(task.on_success_callback)  # type: ignore
-    # task.pre_execute = _wrap_pre_execution(task.pre_execute)
-
-
-def _wrap_task_policy(policy):
-    if policy and hasattr(policy, "_task_policy_patched_by"):
-        return policy
-
-    def custom_task_policy(task):
-        policy(task)
-        task_policy(task)
-
-    # Add a flag to the policy to indicate that we've patched it.
-    custom_task_policy._task_policy_patched_by = "datahub_plugin"  # type: ignore[attr-defined]
-    return custom_task_policy
 
+            listeners: list = list(filter(None, [get_airflow_plugin_listener()]))
 
-def _patch_policy(settings):
-    if hasattr(settings, "task_policy"):
-        datahub_task_policy = _wrap_task_policy(settings.task_policy)
-        settings.task_policy = datahub_task_policy
+        else:
+            # On Airflow < 2.5, we need the listener to be a module.
+            # This is just a quick shim layer to make that work.
+            # The DAG listener API was added at the same time as this method
+            # was fixed, so we're reusing the same check variable.
+            #
+            # Related Airflow change: https://github.com/apache/airflow/pull/27113.
+            import datahub_airflow_plugin._datahub_listener_module as _listener_module  # type: ignore[misc]
 
+            listeners = [_listener_module]
 
-def _patch_datahub_policy():
-    with contextlib.suppress(ImportError):
-        import airflow_local_settings
 
-        _patch_policy(airflow_local_settings)
-
-    from airflow.models.dagbag import settings
-
-    _patch_policy(settings)
-
-
-_patch_datahub_policy()
-
-
-class DatahubPlugin(AirflowPlugin):
-    name = "datahub_plugin"
+if not _USE_AIRFLOW_LISTENER_INTERFACE:
+    # Use the policy patcher mechanism on Airflow 2.2 and below.
+    import datahub_airflow_plugin.datahub_plugin_v22  # noqa: F401
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py
new file mode 100644
index 0000000000000..046fbb5efaa03
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py
@@ -0,0 +1,336 @@
+import contextlib
+import logging
+import traceback
+from typing import Any, Callable, Iterable, List, Optional, Union
+
+import airflow
+from airflow.lineage import PIPELINE_OUTLETS
+from airflow.models.baseoperator import BaseOperator
+from airflow.utils.module_loading import import_string
+from cattr import structure
+from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
+from datahub.telemetry import telemetry
+
+from datahub_airflow_plugin._airflow_shims import (
+    MappedOperator,
+    get_task_inlets,
+    get_task_outlets,
+)
+from datahub_airflow_plugin._config import get_lineage_config
+from datahub_airflow_plugin.client.airflow_generator import AirflowGenerator
+from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook
+from datahub_airflow_plugin.lineage.datahub import DatahubLineageConfig
+
+TASK_ON_FAILURE_CALLBACK = "on_failure_callback"
+TASK_ON_SUCCESS_CALLBACK = "on_success_callback"
+
+
+def get_task_inlets_advanced(task: BaseOperator, context: Any) -> Iterable[Any]:
+    # TODO: Fix for https://github.com/apache/airflow/commit/1b1f3fabc5909a447a6277cafef3a0d4ef1f01ae
+    # in Airflow 2.4.
+    # TODO: ignore/handle airflow's dataset type in our lineage
+
+    inlets: List[Any] = []
+    task_inlets = get_task_inlets(task)
+    # From Airflow 2.3 this should be AbstractOperator but due to compatibility reason lets use BaseOperator
+    if isinstance(task_inlets, (str, BaseOperator)):
+        inlets = [
+            task_inlets,
+        ]
+
+    if task_inlets and isinstance(task_inlets, list):
+        inlets = []
+        task_ids = (
+            {o for o in task_inlets if isinstance(o, str)}
+            .union(op.task_id for op in task_inlets if isinstance(op, BaseOperator))
+            .intersection(task.get_flat_relative_ids(upstream=True))
+        )
+
+        from airflow.lineage import AUTO
+
+        # pick up unique direct upstream task_ids if AUTO is specified
+        if AUTO.upper() in task_inlets or AUTO.lower() in task_inlets:
+            print("Picking up unique direct upstream task_ids as AUTO is specified")
+            task_ids = task_ids.union(
+                task_ids.symmetric_difference(task.upstream_task_ids)
+            )
+
+        inlets = task.xcom_pull(
+            context, task_ids=list(task_ids), dag_id=task.dag_id, key=PIPELINE_OUTLETS
+        )
+
+        # re-instantiate the obtained inlets
+        inlets = [
+            structure(item["data"], import_string(item["type_name"]))
+            # _get_instance(structure(item, Metadata))
+            for sublist in inlets
+            if sublist
+            for item in sublist
+        ]
+
+        for inlet in task_inlets:
+            if not isinstance(inlet, str):
+                inlets.append(inlet)
+
+    return inlets
+
+
+def _make_emit_callback(
+    logger: logging.Logger,
+) -> Callable[[Optional[Exception], str], None]:
+    def emit_callback(err: Optional[Exception], msg: str) -> None:
+        if err:
+            logger.error(f"Error sending metadata to datahub: {msg}", exc_info=err)
+
+    return emit_callback
+
+
+def datahub_task_status_callback(context, status):
+    ti = context["ti"]
+    task: "BaseOperator" = ti.task
+    dag = context["dag"]
+    config: DatahubLineageConfig = context["_datahub_config"]
+
+    # This code is from the original airflow lineage code ->
+    # https://github.com/apache/airflow/blob/main/airflow/lineage/__init__.py
+    inlets = get_task_inlets_advanced(task, context)
+
+    emitter = (
+        DatahubGenericHook(config.datahub_conn_id).get_underlying_hook().make_emitter()
+    )
+
+    dataflow = AirflowGenerator.generate_dataflow(
+        cluster=config.cluster,
+        dag=dag,
+        capture_tags=config.capture_tags_info,
+        capture_owner=config.capture_ownership_info,
+    )
+    task.log.info(f"Emitting Datahub Dataflow: {dataflow}")
+    dataflow.emit(emitter, callback=_make_emit_callback(task.log))
+
+    datajob = AirflowGenerator.generate_datajob(
+        cluster=config.cluster,
+        task=task,
+        dag=dag,
+        capture_tags=config.capture_tags_info,
+        capture_owner=config.capture_ownership_info,
+    )
+
+    for inlet in inlets:
+        datajob.inlets.append(inlet.urn)
+
+    task_outlets = get_task_outlets(task)
+    for outlet in task_outlets:
+        datajob.outlets.append(outlet.urn)
+
+    task.log.info(f"Emitting Datahub Datajob: {datajob}")
+    datajob.emit(emitter, callback=_make_emit_callback(task.log))
+
+    if config.capture_executions:
+        dpi = AirflowGenerator.run_datajob(
+            emitter=emitter,
+            cluster=config.cluster,
+            ti=ti,
+            dag=dag,
+            dag_run=context["dag_run"],
+            datajob=datajob,
+            start_timestamp_millis=int(ti.start_date.timestamp() * 1000),
+        )
+
+        task.log.info(f"Emitted Start Datahub Dataprocess Instance: {dpi}")
+
+        dpi = AirflowGenerator.complete_datajob(
+            emitter=emitter,
+            cluster=config.cluster,
+            ti=ti,
+            dag_run=context["dag_run"],
+            result=status,
+            dag=dag,
+            datajob=datajob,
+            end_timestamp_millis=int(ti.end_date.timestamp() * 1000),
+        )
+        task.log.info(f"Emitted Completed Data Process Instance: {dpi}")
+
+    emitter.flush()
+
+
+def datahub_pre_execution(context):
+    ti = context["ti"]
+    task: "BaseOperator" = ti.task
+    dag = context["dag"]
+    config: DatahubLineageConfig = context["_datahub_config"]
+
+    task.log.info("Running Datahub pre_execute method")
+
+    emitter = (
+        DatahubGenericHook(config.datahub_conn_id).get_underlying_hook().make_emitter()
+    )
+
+    # This code is from the original airflow lineage code ->
+    # https://github.com/apache/airflow/blob/main/airflow/lineage/__init__.py
+    inlets = get_task_inlets_advanced(task, context)
+
+    datajob = AirflowGenerator.generate_datajob(
+        cluster=config.cluster,
+        task=ti.task,
+        dag=dag,
+        capture_tags=config.capture_tags_info,
+        capture_owner=config.capture_ownership_info,
+    )
+
+    for inlet in inlets:
+        datajob.inlets.append(inlet.urn)
+
+    task_outlets = get_task_outlets(task)
+
+    for outlet in task_outlets:
+        datajob.outlets.append(outlet.urn)
+
+    task.log.info(f"Emitting Datahub dataJob {datajob}")
+    datajob.emit(emitter, callback=_make_emit_callback(task.log))
+
+    if config.capture_executions:
+        dpi = AirflowGenerator.run_datajob(
+            emitter=emitter,
+            cluster=config.cluster,
+            ti=ti,
+            dag=dag,
+            dag_run=context["dag_run"],
+            datajob=datajob,
+            start_timestamp_millis=int(ti.start_date.timestamp() * 1000),
+        )
+
+        task.log.info(f"Emitting Datahub Dataprocess Instance: {dpi}")
+
+    emitter.flush()
+
+
+def _wrap_pre_execution(pre_execution):
+    def custom_pre_execution(context):
+        config = get_lineage_config()
+        if config.enabled:
+            context["_datahub_config"] = config
+            datahub_pre_execution(context)
+
+        # Call original policy
+        if pre_execution:
+            pre_execution(context)
+
+    return custom_pre_execution
+
+
+def _wrap_on_failure_callback(on_failure_callback):
+    def custom_on_failure_callback(context):
+        config = get_lineage_config()
+        if config.enabled:
+            context["_datahub_config"] = config
+            try:
+                datahub_task_status_callback(context, status=InstanceRunResult.FAILURE)
+            except Exception as e:
+                if not config.graceful_exceptions:
+                    raise e
+                else:
+                    print(f"Exception: {traceback.format_exc()}")
+
+        # Call original policy
+        if on_failure_callback:
+            on_failure_callback(context)
+
+    return custom_on_failure_callback
+
+
+def _wrap_on_success_callback(on_success_callback):
+    def custom_on_success_callback(context):
+        config = get_lineage_config()
+        if config.enabled:
+            context["_datahub_config"] = config
+            try:
+                datahub_task_status_callback(context, status=InstanceRunResult.SUCCESS)
+            except Exception as e:
+                if not config.graceful_exceptions:
+                    raise e
+                else:
+                    print(f"Exception: {traceback.format_exc()}")
+
+        # Call original policy
+        if on_success_callback:
+            on_success_callback(context)
+
+    return custom_on_success_callback
+
+
+def task_policy(task: Union[BaseOperator, MappedOperator]) -> None:
+    task.log.debug(f"Setting task policy for Dag: {task.dag_id} Task: {task.task_id}")
+    # task.add_inlets(["auto"])
+    # task.pre_execute = _wrap_pre_execution(task.pre_execute)
+
+    # MappedOperator's callbacks don't have setters until Airflow 2.X.X
+    # https://github.com/apache/airflow/issues/24547
+    # We can bypass this by going through partial_kwargs for now
+    if MappedOperator and isinstance(task, MappedOperator):  # type: ignore
+        on_failure_callback_prop: property = getattr(
+            MappedOperator, TASK_ON_FAILURE_CALLBACK
+        )
+        on_success_callback_prop: property = getattr(
+            MappedOperator, TASK_ON_SUCCESS_CALLBACK
+        )
+        if not on_failure_callback_prop.fset or not on_success_callback_prop.fset:
+            task.log.debug(
+                "Using MappedOperator's partial_kwargs instead of callback properties"
+            )
+            task.partial_kwargs[TASK_ON_FAILURE_CALLBACK] = _wrap_on_failure_callback(
+                task.on_failure_callback
+            )
+            task.partial_kwargs[TASK_ON_SUCCESS_CALLBACK] = _wrap_on_success_callback(
+                task.on_success_callback
+            )
+            return
+
+    task.on_failure_callback = _wrap_on_failure_callback(task.on_failure_callback)  # type: ignore
+    task.on_success_callback = _wrap_on_success_callback(task.on_success_callback)  # type: ignore
+    # task.pre_execute = _wrap_pre_execution(task.pre_execute)
+
+
+def _wrap_task_policy(policy):
+    if policy and hasattr(policy, "_task_policy_patched_by"):
+        return policy
+
+    def custom_task_policy(task):
+        policy(task)
+        task_policy(task)
+
+    # Add a flag to the policy to indicate that we've patched it.
+    custom_task_policy._task_policy_patched_by = "datahub_plugin"  # type: ignore[attr-defined]
+    return custom_task_policy
+
+
+def _patch_policy(settings):
+    if hasattr(settings, "task_policy"):
+        datahub_task_policy = _wrap_task_policy(settings.task_policy)
+        settings.task_policy = datahub_task_policy
+
+
+def _patch_datahub_policy():
+    with contextlib.suppress(ImportError):
+        import airflow_local_settings
+
+        _patch_policy(airflow_local_settings)
+
+    from airflow.models.dagbag import settings
+
+    _patch_policy(settings)
+
+    plugin_config = get_lineage_config()
+    telemetry.telemetry_instance.ping(
+        "airflow-plugin-init",
+        {
+            "airflow-version": airflow.__version__,
+            "datahub-airflow-plugin": "v1",
+            "capture_executions": plugin_config.capture_executions,
+            "capture_tags": plugin_config.capture_tags_info,
+            "capture_ownership": plugin_config.capture_ownership_info,
+        },
+    )
+
+
+_patch_datahub_policy()
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py
index f40295c6bb883..0d7cdb6b6e90a 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py
@@ -2,12 +2,11 @@
 
 This example demonstrates how to emit lineage to DataHub within an Airflow DAG.
 """
-
 from datetime import timedelta
 
 import datahub.emitter.mce_builder as builder
 from airflow import DAG
-from airflow.providers.snowflake.operators.snowflake import SnowflakeOperator
+from airflow.operators.bash import BashOperator
 from airflow.utils.dates import days_ago
 
 from datahub_airflow_plugin.operators.datahub import DatahubEmitterOperator
@@ -33,23 +32,10 @@
     catchup=False,
     default_view="tree",
 ) as dag:
-    # This example shows a SnowflakeOperator followed by a lineage emission. However, the
-    # same DatahubEmitterOperator can be used to emit lineage in any context.
-
-    sql = """CREATE OR REPLACE TABLE `mydb.schema.tableC` AS
-            WITH some_table AS (
-              SELECT * FROM `mydb.schema.tableA`
-            ),
-            some_other_table AS (
-              SELECT id, some_column FROM `mydb.schema.tableB`
-            )
-            SELECT * FROM some_table
-            LEFT JOIN some_other_table ON some_table.unique_id=some_other_table.id"""
-    transformation_task = SnowflakeOperator(
-        task_id="snowflake_transformation",
+    transformation_task = BashOperator(
+        task_id="transformation_task",
         dag=dag,
-        snowflake_conn_id="snowflake_default",
-        sql=sql,
+        bash_command="echo 'This is where you might run your data tooling.'",
     )
 
     emit_lineage_task = DatahubEmitterOperator(
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py
index 8fb7363f8cad1..9604931795ccb 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py
@@ -1,7 +1,9 @@
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Dict, Optional, Sequence, Tuple, Union
 
 from airflow.exceptions import AirflowException
 from airflow.hooks.base import BaseHook
+from datahub.emitter.generic_emitter import Emitter
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
     MetadataChangeEvent,
     MetadataChangeProposal,
@@ -11,6 +13,7 @@
     from airflow.models.connection import Connection
     from datahub.emitter.kafka_emitter import DatahubKafkaEmitter
     from datahub.emitter.rest_emitter import DatahubRestEmitter
+    from datahub.emitter.synchronized_file_emitter import SynchronizedFileEmitter
     from datahub.ingestion.sink.datahub_kafka import KafkaSinkConfig
 
 
@@ -80,17 +83,24 @@ def make_emitter(self) -> "DatahubRestEmitter":
 
         return datahub.emitter.rest_emitter.DatahubRestEmitter(*self._get_config())
 
-    def emit_mces(self, mces: List[MetadataChangeEvent]) -> None:
+    def emit(
+        self,
+        items: Sequence[
+            Union[
+                MetadataChangeEvent,
+                MetadataChangeProposal,
+                MetadataChangeProposalWrapper,
+            ]
+        ],
+    ) -> None:
         emitter = self.make_emitter()
 
-        for mce in mces:
-            emitter.emit_mce(mce)
+        for item in items:
+            emitter.emit(item)
 
-    def emit_mcps(self, mcps: List[MetadataChangeProposal]) -> None:
-        emitter = self.make_emitter()
-
-        for mce in mcps:
-            emitter.emit_mcp(mce)
+    # Retained for backwards compatibility.
+    emit_mces = emit
+    emit_mcps = emit
 
 
 class DatahubKafkaHook(BaseHook):
@@ -152,7 +162,16 @@ def make_emitter(self) -> "DatahubKafkaEmitter":
         sink_config = self._get_config()
         return datahub.emitter.kafka_emitter.DatahubKafkaEmitter(sink_config)
 
-    def emit_mces(self, mces: List[MetadataChangeEvent]) -> None:
+    def emit(
+        self,
+        items: Sequence[
+            Union[
+                MetadataChangeEvent,
+                MetadataChangeProposal,
+                MetadataChangeProposalWrapper,
+            ]
+        ],
+    ) -> None:
         emitter = self.make_emitter()
         errors = []
 
@@ -160,29 +179,50 @@ def callback(exc, msg):
             if exc:
                 errors.append(exc)
 
-        for mce in mces:
-            emitter.emit_mce_async(mce, callback)
+        for mce in items:
+            emitter.emit(mce, callback)
 
         emitter.flush()
 
         if errors:
-            raise AirflowException(f"failed to push some MCEs: {errors}")
+            raise AirflowException(f"failed to push some metadata: {errors}")
 
-    def emit_mcps(self, mcps: List[MetadataChangeProposal]) -> None:
-        emitter = self.make_emitter()
-        errors = []
+    # Retained for backwards compatibility.
+    emit_mces = emit
+    emit_mcps = emit
 
-        def callback(exc, msg):
-            if exc:
-                errors.append(exc)
 
-        for mcp in mcps:
-            emitter.emit_mcp_async(mcp, callback)
+class SynchronizedFileHook(BaseHook):
+    conn_type = "datahub-file"
 
-        emitter.flush()
+    def __init__(self, datahub_conn_id: str) -> None:
+        super().__init__()
+        self.datahub_conn_id = datahub_conn_id
 
-        if errors:
-            raise AirflowException(f"failed to push some MCPs: {errors}")
+    def make_emitter(self) -> "SynchronizedFileEmitter":
+        from datahub.emitter.synchronized_file_emitter import SynchronizedFileEmitter
+
+        conn = self.get_connection(self.datahub_conn_id)
+        filename = conn.host
+        if not filename:
+            raise AirflowException("filename parameter is required")
+
+        return SynchronizedFileEmitter(filename=filename)
+
+    def emit(
+        self,
+        items: Sequence[
+            Union[
+                MetadataChangeEvent,
+                MetadataChangeProposal,
+                MetadataChangeProposalWrapper,
+            ]
+        ],
+    ) -> None:
+        emitter = self.make_emitter()
+
+        for item in items:
+            emitter.emit(item)
 
 
 class DatahubGenericHook(BaseHook):
@@ -198,7 +238,9 @@ def __init__(self, datahub_conn_id: str) -> None:
         super().__init__()
         self.datahub_conn_id = datahub_conn_id
 
-    def get_underlying_hook(self) -> Union[DatahubRestHook, DatahubKafkaHook]:
+    def get_underlying_hook(
+        self,
+    ) -> Union[DatahubRestHook, DatahubKafkaHook, SynchronizedFileHook]:
         conn = self.get_connection(self.datahub_conn_id)
 
         # We need to figure out the underlying hook type. First check the
@@ -213,6 +255,11 @@ def get_underlying_hook(self) -> Union[DatahubRestHook, DatahubKafkaHook]:
             or conn.conn_type == DatahubKafkaHook.conn_type.replace("-", "_")
         ):
             return DatahubKafkaHook(self.datahub_conn_id)
+        elif (
+            conn.conn_type == SynchronizedFileHook.conn_type
+            or conn.conn_type == SynchronizedFileHook.conn_type.replace("-", "_")
+        ):
+            return SynchronizedFileHook(self.datahub_conn_id)
         elif "rest" in self.datahub_conn_id:
             return DatahubRestHook(self.datahub_conn_id)
         elif "kafka" in self.datahub_conn_id:
@@ -222,8 +269,20 @@ def get_underlying_hook(self) -> Union[DatahubRestHook, DatahubKafkaHook]:
                 f"DataHub cannot handle conn_type {conn.conn_type} in {conn}"
             )
 
-    def make_emitter(self) -> Union["DatahubRestEmitter", "DatahubKafkaEmitter"]:
+    def make_emitter(self) -> Emitter:
         return self.get_underlying_hook().make_emitter()
 
-    def emit_mces(self, mces: List[MetadataChangeEvent]) -> None:
-        return self.get_underlying_hook().emit_mces(mces)
+    def emit(
+        self,
+        items: Sequence[
+            Union[
+                MetadataChangeEvent,
+                MetadataChangeProposal,
+                MetadataChangeProposalWrapper,
+            ]
+        ],
+    ) -> None:
+        return self.get_underlying_hook().emit(items)
+
+    # Retained for backwards compatibility.
+    emit_mces = emit
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_lineage_core.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py
similarity index 72%
rename from metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_lineage_core.py
rename to metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py
index d91c039ffa718..f5f519fa23b11 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_lineage_core.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py
@@ -1,11 +1,10 @@
 from datetime import datetime
 from typing import TYPE_CHECKING, Dict, List
 
-import datahub.emitter.mce_builder as builder
 from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult
-from datahub.configuration.common import ConfigModel
 from datahub.utilities.urns.dataset_urn import DatasetUrn
 
+from datahub_airflow_plugin._config import DatahubLineageConfig
 from datahub_airflow_plugin.client.airflow_generator import AirflowGenerator
 from datahub_airflow_plugin.entities import _Entity
 
@@ -15,39 +14,14 @@
     from airflow.models.taskinstance import TaskInstance
 
     from datahub_airflow_plugin._airflow_shims import Operator
-    from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook
 
 
 def _entities_to_urn_list(iolets: List[_Entity]) -> List[DatasetUrn]:
     return [DatasetUrn.create_from_string(let.urn) for let in iolets]
 
 
-class DatahubBasicLineageConfig(ConfigModel):
-    enabled: bool = True
-
-    # DataHub hook connection ID.
-    datahub_conn_id: str
-
-    # Cluster to associate with the pipelines and tasks. Defaults to "prod".
-    cluster: str = builder.DEFAULT_FLOW_CLUSTER
-
-    # If true, the owners field of the DAG will be capture as a DataHub corpuser.
-    capture_ownership_info: bool = True
-
-    # If true, the tags field of the DAG will be captured as DataHub tags.
-    capture_tags_info: bool = True
-
-    capture_executions: bool = False
-
-    def make_emitter_hook(self) -> "DatahubGenericHook":
-        # This is necessary to avoid issues with circular imports.
-        from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook
-
-        return DatahubGenericHook(self.datahub_conn_id)
-
-
 def send_lineage_to_datahub(
-    config: DatahubBasicLineageConfig,
+    config: DatahubLineageConfig,
     operator: "Operator",
     inlets: List[_Entity],
     outlets: List[_Entity],
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/datahub.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/datahub.py
index c41bb2b2a1e37..3ebe7831d08f9 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/datahub.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/datahub.py
@@ -4,8 +4,8 @@
 from airflow.configuration import conf
 from airflow.lineage.backend import LineageBackend
 
-from datahub_airflow_plugin._lineage_core import (
-    DatahubBasicLineageConfig,
+from datahub_airflow_plugin.lineage._lineage_core import (
+    DatahubLineageConfig,
     send_lineage_to_datahub,
 )
 
@@ -13,14 +13,7 @@
     from airflow.models.baseoperator import BaseOperator
 
 
-class DatahubLineageConfig(DatahubBasicLineageConfig):
-    # If set to true, most runtime errors in the lineage backend will be
-    # suppressed and will not cause the overall task to fail. Note that
-    # configuration issues will still throw exceptions.
-    graceful_exceptions: bool = True
-
-
-def get_lineage_config() -> DatahubLineageConfig:
+def get_lineage_backend_config() -> DatahubLineageConfig:
     """Load the lineage config from airflow.cfg."""
 
     # The kwargs pattern is also used for secret backends.
@@ -51,8 +44,7 @@ class DatahubLineageBackend(LineageBackend):
         datahub_kwargs = {
             "datahub_conn_id": "datahub_rest_default",
             "capture_ownership_info": true,
-            "capture_tags_info": true,
-            "graceful_exceptions": true }
+            "capture_tags_info": true }
         # The above indentation is important!
     """
 
@@ -61,7 +53,7 @@ def __init__(self) -> None:
 
         # By attempting to get and parse the config, we can detect configuration errors
         # ahead of time. The init method is only called in Airflow 2.x.
-        _ = get_lineage_config()
+        _ = get_lineage_backend_config()
 
     # With Airflow 2.0, this can be an instance method. However, with Airflow 1.10.x, this
     # method is used statically, even though LineageBackend declares it as an instance variable.
@@ -72,7 +64,7 @@ def send_lineage(
         outlets: Optional[List] = None,  # unused
         context: Optional[Dict] = None,
     ) -> None:
-        config = get_lineage_config()
+        config = get_lineage_backend_config()
         if not config.enabled:
             return
 
@@ -82,10 +74,4 @@ def send_lineage(
                 config, operator, operator.inlets, operator.outlets, context
             )
         except Exception as e:
-            if config.graceful_exceptions:
-                operator.log.error(e)
-                operator.log.info(
-                    "Suppressing error because graceful_exceptions is set"
-                )
-            else:
-                raise
+            operator.log.error(e)
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub.py
index 109e7ddfe4dfa..15b50c51a561d 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub.py
@@ -57,7 +57,7 @@ def __init__(  # type: ignore[no-untyped-def]
             datahub_conn_id=datahub_conn_id,
             **kwargs,
         )
-        self.mces = mces
+        self.metadata = mces
 
     def execute(self, context):
-        self.generic_hook.get_underlying_hook().emit_mces(self.mces)
+        self.generic_hook.get_underlying_hook().emit(self.metadata)
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/conftest.py b/metadata-ingestion-modules/airflow-plugin/tests/conftest.py
new file mode 100644
index 0000000000000..d2c45e723f1b0
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/conftest.py
@@ -0,0 +1,6 @@
+def pytest_addoption(parser):
+    parser.addoption(
+        "--update-golden-files",
+        action="store_true",
+        default=False,
+    )
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py
new file mode 100644
index 0000000000000..8b0803ab98422
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py
@@ -0,0 +1,34 @@
+from datetime import datetime
+
+from airflow import DAG
+from airflow.operators.bash import BashOperator
+
+from datahub_airflow_plugin.entities import Dataset, Urn
+
+with DAG(
+    "basic_iolets",
+    start_date=datetime(2023, 1, 1),
+    schedule_interval=None,
+    catchup=False,
+) as dag:
+    task = BashOperator(
+        task_id="run_data_task",
+        dag=dag,
+        bash_command="echo 'This is where you might run your data tooling.'",
+        inlets=[
+            Dataset(platform="snowflake", name="mydb.schema.tableA"),
+            Dataset(platform="snowflake", name="mydb.schema.tableB", env="DEV"),
+            Dataset(
+                platform="snowflake",
+                name="mydb.schema.tableC",
+                platform_instance="cloud",
+            ),
+            Urn(
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ),
+        ],
+        outlets=[
+            Dataset("snowflake", "mydb.schema.tableD"),
+            Dataset("snowflake", "mydb.schema.tableE"),
+        ],
+    )
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py
new file mode 100644
index 0000000000000..1dd047f0a6dcc
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py
@@ -0,0 +1,34 @@
+from datetime import datetime
+
+from airflow import DAG
+from airflow.operators.bash import BashOperator
+
+from datahub_airflow_plugin.entities import Dataset, Urn
+
+with DAG(
+    "simple_dag",
+    start_date=datetime(2023, 1, 1),
+    schedule_interval=None,
+    catchup=False,
+    description="A simple DAG that runs a few fake data tasks.",
+) as dag:
+    task1 = BashOperator(
+        task_id="task_1",
+        dag=dag,
+        bash_command="echo 'task 1'",
+        inlets=[
+            Dataset(platform="snowflake", name="mydb.schema.tableA"),
+            Urn(
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ),
+        ],
+        outlets=[Dataset("snowflake", "mydb.schema.tableD")],
+    )
+
+    task2 = BashOperator(
+        task_id="run_another_data_task",
+        dag=dag,
+        bash_command="echo 'task 2'",
+    )
+
+    task1 >> task2
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/snowflake_operator.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/snowflake_operator.py
new file mode 100644
index 0000000000000..347d0f88b0cd0
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/snowflake_operator.py
@@ -0,0 +1,32 @@
+from datetime import datetime
+
+from airflow import DAG
+from airflow.providers.snowflake.operators.snowflake import SnowflakeOperator
+
+SNOWFLAKE_COST_TABLE = "costs"
+SNOWFLAKE_PROCESSED_TABLE = "processed_costs"
+
+with DAG(
+    "snowflake_operator",
+    start_date=datetime(2023, 1, 1),
+    schedule_interval=None,
+    catchup=False,
+) as dag:
+    transform_cost_table = SnowflakeOperator(
+        snowflake_conn_id="my_snowflake",
+        task_id="transform_cost_table",
+        sql="""
+        CREATE OR REPLACE TABLE {{ params.out_table_name }} AS
+        SELECT
+            id,
+            month,
+            total_cost,
+            area,
+            total_cost / area as cost_per_area
+        FROM {{ params.in_table_name }}
+        """,
+        params={
+            "in_table_name": SNOWFLAKE_COST_TABLE,
+            "out_table_name": SNOWFLAKE_PROCESSED_TABLE,
+        },
+    )
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py
new file mode 100644
index 0000000000000..77faec3c8935a
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py
@@ -0,0 +1,75 @@
+from datetime import datetime
+
+from airflow import DAG
+from airflow.providers.sqlite.operators.sqlite import SqliteOperator
+
+CONN_ID = "my_sqlite"
+
+COST_TABLE = "costs"
+PROCESSED_TABLE = "processed_costs"
+
+with DAG(
+    "sqlite_operator",
+    start_date=datetime(2023, 1, 1),
+    schedule_interval=None,
+    catchup=False,
+) as dag:
+    create_cost_table = SqliteOperator(
+        sqlite_conn_id=CONN_ID,
+        task_id="create_cost_table",
+        sql="""
+        CREATE TABLE IF NOT EXISTS {{ params.table_name }} (
+            id INTEGER PRIMARY KEY,
+            month TEXT NOT NULL,
+            total_cost REAL NOT NULL,
+            area REAL NOT NULL
+        )
+        """,
+        params={"table_name": COST_TABLE},
+    )
+
+    populate_cost_table = SqliteOperator(
+        sqlite_conn_id=CONN_ID,
+        task_id="populate_cost_table",
+        sql="""
+        INSERT INTO {{ params.table_name }} (id, month, total_cost, area)
+        VALUES
+            (1, '2021-01', 100, 10),
+            (2, '2021-02', 200, 20),
+            (3, '2021-03', 300, 30)
+        """,
+        params={"table_name": COST_TABLE},
+    )
+
+    transform_cost_table = SqliteOperator(
+        sqlite_conn_id=CONN_ID,
+        task_id="transform_cost_table",
+        sql="""
+        CREATE TABLE IF NOT EXISTS {{ params.out_table_name }} AS
+        SELECT
+            id,
+            month,
+            total_cost,
+            area,
+            total_cost / area as cost_per_area
+        FROM {{ params.in_table_name }}
+        """,
+        params={
+            "in_table_name": COST_TABLE,
+            "out_table_name": PROCESSED_TABLE,
+        },
+    )
+
+    cleanup_tables = []
+    for table_name in [COST_TABLE, PROCESSED_TABLE]:
+        cleanup_table = SqliteOperator(
+            sqlite_conn_id=CONN_ID,
+            task_id=f"cleanup_{table_name}",
+            sql="""
+            DROP TABLE {{ params.table_name }}
+            """,
+            params={"table_name": table_name},
+        )
+        cleanup_tables.append(cleanup_table)
+
+    create_cost_table >> populate_cost_table >> transform_cost_table >> cleanup_tables
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json
new file mode 100644
index 0000000000000..26aa2afaa831a
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json
@@ -0,0 +1,533 @@
+[
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,basic_iolets,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "None",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=basic_iolets",
+            "name": "basic_iolets"
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,basic_iolets,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,basic_iolets,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_data_task'",
+                "trigger_rule": "'all_success'",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=basic_iolets&_flt_3_task_id=run_data_task",
+            "name": "run_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_data_task'",
+                "trigger_rule": "'all_success'",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=basic_iolets&_flt_3_task_id=run_data_task",
+            "name": "run_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "0.176536",
+                "start_date": "2023-09-30 00:49:56.670239+00:00",
+                "end_date": "2023-09-30 00:49:56.846775+00:00",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "1",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "success",
+                "operator": "BashOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets",
+            "name": "basic_iolets_run_data_task_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696034996670,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696034996670,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 2
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696034996846,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json
new file mode 100644
index 0000000000000..b2e3a1fe47da7
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json
@@ -0,0 +1,718 @@
+[
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "None",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=simple_dag",
+            "name": "simple_dag",
+            "description": "A simple DAG that runs a few fake data tasks."
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'task_1'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'task_1'",
+                "trigger_rule": "'all_success'",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['run_another_data_task']",
+                "inlets": "[]",
+                "outlets": "[]"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=task_1",
+            "name": "task_1",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'task_1'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'task_1'",
+                "trigger_rule": "'all_success'",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['run_another_data_task']",
+                "inlets": "[]",
+                "outlets": "[]"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=task_1",
+            "name": "task_1",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "0.175983",
+                "start_date": "2023-09-30 00:48:58.943850+00:00",
+                "end_date": "2023-09-30 00:48:59.119833+00:00",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "1",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "success",
+                "operator": "BashOperator",
+                "priority_weight": "2",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag",
+            "name": "simple_dag_task_1_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696034938943,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696034938943,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 2
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696034939119,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "None",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=simple_dag",
+            "name": "simple_dag",
+            "description": "A simple DAG that runs a few fake data tasks."
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_another_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_another_data_task'",
+                "trigger_rule": "'all_success'",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=run_another_data_task",
+            "name": "run_another_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_another_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_another_data_task'",
+                "trigger_rule": "'all_success'",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=run_another_data_task",
+            "name": "run_another_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "0.129888",
+                "start_date": "2023-09-30 00:49:02.158752+00:00",
+                "end_date": "2023-09-30 00:49:02.288640+00:00",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "1",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "success",
+                "operator": "BashOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag",
+            "name": "simple_dag_run_another_data_task_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696034942158,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696034942158,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 2
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696034942288,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json
new file mode 100644
index 0000000000000..2e733c2ad40a9
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json
@@ -0,0 +1,535 @@
+[
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,basic_iolets,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=basic_iolets",
+            "name": "basic_iolets"
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,basic_iolets,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,basic_iolets,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_data_task'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableB', env='DEV', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableC', env='PROD', platform_instance='cloud'), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)')]",
+                "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableE', env='PROD', platform_instance=None)]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'This is where you might run your data tooling.'\", \"dag\": \"<<non-serializable: DAG>>\", \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"env\": \"DEV\", \"name\": \"mydb.schema.tableB\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableC\", \"platform\": \"snowflake\", \"platform_instance\": \"cloud\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableE\", \"platform\": \"snowflake\"}], \"task_id\": \"run_data_task\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_lock_for_execution\": true, \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'This is where you might run your data tooling.'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"env\": \"DEV\", \"name\": \"mydb.schema.tableB\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableC\", \"platform\": \"snowflake\", \"platform_instance\": \"cloud\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableE\", \"platform\": \"snowflake\"}], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_on_exit_code\": [99], \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"run_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [], \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=basic_iolets&_flt_3_task_id=run_data_task",
+            "name": "run_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 01:13:14.266272+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "BashOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1",
+            "name": "basic_iolets_run_data_task_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696036394266,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696036394266,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_data_task'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableB', env='DEV', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableC', env='PROD', platform_instance='cloud'), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)')]",
+                "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableE', env='PROD', platform_instance=None)]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'This is where you might run your data tooling.'\", \"dag\": \"<<non-serializable: DAG>>\", \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"env\": \"DEV\", \"name\": \"mydb.schema.tableB\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableC\", \"platform\": \"snowflake\", \"platform_instance\": \"cloud\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableE\", \"platform\": \"snowflake\"}], \"task_id\": \"run_data_task\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_lock_for_execution\": true, \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'This is where you might run your data tooling.'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"env\": \"DEV\", \"name\": \"mydb.schema.tableB\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableC\", \"platform\": \"snowflake\", \"platform_instance\": \"cloud\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableE\", \"platform\": \"snowflake\"}], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_on_exit_code\": [99], \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"run_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [], \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=basic_iolets&_flt_3_task_id=run_data_task",
+            "name": "run_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696036394833,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json
new file mode 100644
index 0000000000000..44b288efda954
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json
@@ -0,0 +1,535 @@
+[
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,basic_iolets,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=basic_iolets",
+            "name": "basic_iolets"
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,basic_iolets,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,basic_iolets,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_data_task'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableB', env='DEV', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableC', env='PROD', platform_instance='cloud'), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)')]",
+                "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableE', env='PROD', platform_instance=None)]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'This is where you might run your data tooling.'\", \"dag\": \"<<non-serializable: DAG>>\", \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"env\": \"DEV\", \"name\": \"mydb.schema.tableB\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableC\", \"platform\": \"snowflake\", \"platform_instance\": \"cloud\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableE\", \"platform\": \"snowflake\"}], \"task_id\": \"run_data_task\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'This is where you might run your data tooling.'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"env\": \"DEV\", \"name\": \"mydb.schema.tableB\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableC\", \"platform\": \"snowflake\", \"platform_instance\": \"cloud\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableE\", \"platform\": \"snowflake\"}], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_exit_code\": 99, \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"run_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [], \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=basic_iolets&_flt_3_task_id=run_data_task",
+            "name": "run_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:59:52.401211+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "BashOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1",
+            "name": "basic_iolets_run_data_task_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696057192401,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057192401,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_data_task'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableB', env='DEV', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableC', env='PROD', platform_instance='cloud'), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)')]",
+                "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableE', env='PROD', platform_instance=None)]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'This is where you might run your data tooling.'\", \"dag\": \"<<non-serializable: DAG>>\", \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"env\": \"DEV\", \"name\": \"mydb.schema.tableB\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableC\", \"platform\": \"snowflake\", \"platform_instance\": \"cloud\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableE\", \"platform\": \"snowflake\"}], \"task_id\": \"run_data_task\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'This is where you might run your data tooling.'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"env\": \"DEV\", \"name\": \"mydb.schema.tableB\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableC\", \"platform\": \"snowflake\", \"platform_instance\": \"cloud\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}, {\"env\": \"PROD\", \"name\": \"mydb.schema.tableE\", \"platform\": \"snowflake\"}], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_exit_code\": 99, \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"run_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [], \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=basic_iolets&_flt_3_task_id=run_data_task",
+            "name": "run_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,cloud.mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableB,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:5d666eaf9015a31b3e305e8bc2dba078",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057192982,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json
new file mode 100644
index 0000000000000..454c509279e11
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json
@@ -0,0 +1,666 @@
+[
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=simple_dag",
+            "name": "simple_dag",
+            "description": "A simple DAG that runs a few fake data tasks."
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'task_1'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'task_1'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['run_another_data_task']",
+                "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)')]",
+                "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None)]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'task 1'\", \"dag\": \"<<non-serializable: DAG>>\", \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}], \"task_id\": \"task_1\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_lock_for_execution\": true, \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'task 1'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [\"run_another_data_task\"], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_on_exit_code\": [99], \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"task_1\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [], \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=task_1",
+            "name": "task_1",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:53:58.219003+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "BashOperator",
+                "priority_weight": "2",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1",
+            "name": "simple_dag_task_1_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696056838219,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056838219,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'task_1'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'task_1'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['run_another_data_task']",
+                "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)')]",
+                "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None)]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'task 1'\", \"dag\": \"<<non-serializable: DAG>>\", \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}], \"task_id\": \"task_1\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_lock_for_execution\": true, \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'task 1'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [\"run_another_data_task\"], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_on_exit_code\": [99], \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"task_1\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [], \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=task_1",
+            "name": "task_1",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056838648,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_another_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_another_data_task'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'task 2'\", \"dag\": \"<<non-serializable: DAG>>\", \"task_id\": \"run_another_data_task\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_lock_for_execution\": true, \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'task 2'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [], \"outlets\": [], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_on_exit_code\": [99], \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"run_another_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [\"task_1\"], \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=run_another_data_task",
+            "name": "run_another_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:54:02.407515+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "BashOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1",
+            "name": "simple_dag_run_another_data_task_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696056842407,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056842407,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_another_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_another_data_task'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'task 2'\", \"dag\": \"<<non-serializable: DAG>>\", \"task_id\": \"run_another_data_task\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_lock_for_execution\": true, \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'task 2'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [], \"outlets\": [], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_on_exit_code\": [99], \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"run_another_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [\"task_1\"], \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=run_another_data_task",
+            "name": "run_another_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056842831,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json
new file mode 100644
index 0000000000000..73b5765e96b7d
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json
@@ -0,0 +1,722 @@
+[
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=simple_dag",
+            "name": "simple_dag",
+            "description": "A simple DAG that runs a few fake data tasks."
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'task_1'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'task_1'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['run_another_data_task']",
+                "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)')]",
+                "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None)]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'task 1'\", \"dag\": \"<<non-serializable: DAG>>\", \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}], \"task_id\": \"task_1\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'task 1'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [\"run_another_data_task\"], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_exit_code\": 99, \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"task_1\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [], \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=task_1",
+            "name": "task_1",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:58:56.105026+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "BashOperator",
+                "priority_weight": "2",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1",
+            "name": "simple_dag_task_1_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696057136105,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057136105,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'task_1'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'task_1'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['run_another_data_task']",
+                "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)')]",
+                "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None)]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'task 1'\", \"dag\": \"<<non-serializable: DAG>>\", \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}], \"task_id\": \"task_1\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'task 1'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [\"run_another_data_task\"], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableA\", \"platform\": \"snowflake\"}, {\"_urn\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)\"}], \"outlets\": [{\"env\": \"PROD\", \"name\": \"mydb.schema.tableD\", \"platform\": \"snowflake\"}], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_exit_code\": 99, \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"task_1\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [], \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=task_1",
+            "name": "task_1",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableA,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057136612,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=simple_dag",
+            "name": "simple_dag",
+            "description": "A simple DAG that runs a few fake data tasks."
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_another_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_another_data_task'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'task 2'\", \"dag\": \"<<non-serializable: DAG>>\", \"task_id\": \"run_another_data_task\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'task 2'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [], \"outlets\": [], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_exit_code\": 99, \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"run_another_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [\"task_1\"], \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=run_another_data_task",
+            "name": "run_another_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:58:59.567004+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "BashOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1",
+            "name": "simple_dag_run_another_data_task_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696057139567,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057139567,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'run_another_data_task'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "task_id": "'run_another_data_task'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"_BaseOperator__from_mapped\": false, \"_BaseOperator__init_kwargs\": {\"bash_command\": \"echo 'task 2'\", \"dag\": \"<<non-serializable: DAG>>\", \"task_id\": \"run_another_data_task\"}, \"_BaseOperator__instantiated\": true, \"_dag\": \"<<non-serializable: DAG>>\", \"_log\": \"<<non-serializable: Logger>>\", \"append_env\": false, \"bash_command\": \"echo 'task 2'\", \"depends_on_past\": false, \"do_xcom_push\": true, \"downstream_task_ids\": [], \"email_on_failure\": true, \"email_on_retry\": true, \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"inlets\": [], \"outlets\": [], \"output_encoding\": \"utf-8\", \"owner\": \"airflow\", \"params\": \"<<non-serializable: ParamsDict>>\", \"pool\": \"default_pool\", \"pool_slots\": 1, \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_delay\": \"<<non-serializable: timedelta>>\", \"retry_exponential_backoff\": false, \"skip_exit_code\": 99, \"start_date\": \"<<non-serializable: DateTime>>\", \"task_group\": \"<<non-serializable: TaskGroup>>\", \"task_id\": \"run_another_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": [\"task_1\"], \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=run_another_data_task",
+            "name": "run_another_data_task",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057140164,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json
new file mode 100644
index 0000000000000..affc395d421da
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json
@@ -0,0 +1,507 @@
+[
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,snowflake_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/snowflake_operator.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=snowflake_operator",
+            "name": "snowflake_operator"
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,snowflake_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,snowflake_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,snowflake_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'transform_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n        CREATE OR REPLACE TABLE processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        '",
+                "task_id": "'transform_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE OR REPLACE TABLE processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        \"}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=snowflake_operator&_flt_3_task_id=transform_cost_table",
+            "name": "transform_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,snowflake_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),id)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),month)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),area)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),area)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD),cost_per_area)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,snowflake_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,snowflake_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:3161034cc84e16a7c5e1906225734747",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:55:36.844976+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SnowflakeOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=snowflake_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=snowflake_operator&map_index=-1",
+            "name": "snowflake_operator_transform_cost_table_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696056936844,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:3161034cc84e16a7c5e1906225734747",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,snowflake_operator,prod),transform_cost_table)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:3161034cc84e16a7c5e1906225734747",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:3161034cc84e16a7c5e1906225734747",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:3161034cc84e16a7c5e1906225734747",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056936844,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,snowflake_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'transform_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n        CREATE OR REPLACE TABLE processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        '",
+                "task_id": "'transform_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE OR REPLACE TABLE processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        \"}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=snowflake_operator&_flt_3_task_id=transform_cost_table",
+            "name": "transform_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,snowflake_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),id)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),month)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),area)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),area)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD),cost_per_area)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,snowflake_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,snowflake_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:3161034cc84e16a7c5e1906225734747",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056938096,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "FAILURE",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json
new file mode 100644
index 0000000000000..1a32b38ce055d
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json
@@ -0,0 +1,1735 @@
+[
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=sqlite_operator",
+            "name": "sqlite_operator"
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'create_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        '",
+                "task_id": "'create_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['populate_cost_table']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table",
+            "name": "create_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fbeed1180fa0434e02ac6f75ace87869",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:56:24.632190+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SqliteOperator",
+                "priority_weight": "5",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1",
+            "name": "sqlite_operator_create_cost_table_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696056984632,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fbeed1180fa0434e02ac6f75ace87869",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fbeed1180fa0434e02ac6f75ace87869",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fbeed1180fa0434e02ac6f75ace87869",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056984632,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'create_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        '",
+                "task_id": "'create_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['populate_cost_table']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table",
+            "name": "create_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fbeed1180fa0434e02ac6f75ace87869",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056984947,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'populate_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "\"\\n        INSERT INTO costs (id, month, total_cost, area)\\n        VALUES\\n            (1, '2021-01', 100, 10),\\n            (2, '2021-02', 200, 20),\\n            (3, '2021-03', 300, 30)\\n        \"",
+                "task_id": "'populate_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['transform_cost_table']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        INSERT INTO costs (id, month, total_cost, area)\\n        VALUES\\n            (1, '2021-01', 100, 10),\\n            (2, '2021-02', 200, 20),\\n            (3, '2021-03', 300, 30)\\n        \"}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=populate_cost_table",
+            "name": "populate_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:56:28.605901+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SqliteOperator",
+                "priority_weight": "4",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1",
+            "name": "sqlite_operator_populate_cost_table_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696056988605,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056988605,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'populate_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "\"\\n        INSERT INTO costs (id, month, total_cost, area)\\n        VALUES\\n            (1, '2021-01', 100, 10),\\n            (2, '2021-02', 200, 20),\\n            (3, '2021-03', 300, 30)\\n        \"",
+                "task_id": "'populate_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['transform_cost_table']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        INSERT INTO costs (id, month, total_cost, area)\\n        VALUES\\n            (1, '2021-01', 100, 10),\\n            (2, '2021-02', 200, 20),\\n            (3, '2021-03', 300, 30)\\n        \"}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=populate_cost_table",
+            "name": "populate_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056989098,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'transform_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n        CREATE TABLE IF NOT EXISTS processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        '",
+                "task_id": "'transform_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['cleanup_costs', 'cleanup_processed_costs']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        \"}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=transform_cost_table",
+            "name": "transform_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)"
+            ],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),cost_per_area)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:56:32.888165+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SqliteOperator",
+                "priority_weight": "3",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1",
+            "name": "sqlite_operator_transform_cost_table_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696056992888,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056992888,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'transform_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n        CREATE TABLE IF NOT EXISTS processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        '",
+                "task_id": "'transform_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['cleanup_costs', 'cleanup_processed_costs']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        \"}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=transform_cost_table",
+            "name": "transform_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)"
+            ],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),cost_per_area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),cost_per_area)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056993744,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'cleanup_costs'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n            DROP TABLE costs\\n            '",
+                "task_id": "'cleanup_costs'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n            DROP TABLE costs\\n            \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_costs",
+            "name": "cleanup_costs",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:56:37.745717+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SqliteOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1",
+            "name": "sqlite_operator_cleanup_costs_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696056997745,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056997745,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'cleanup_costs'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n            DROP TABLE costs\\n            '",
+                "task_id": "'cleanup_costs'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n            DROP TABLE costs\\n            \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_costs",
+            "name": "cleanup_costs",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696056998672,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'cleanup_processed_costs'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n            DROP TABLE processed_costs\\n            '",
+                "task_id": "'cleanup_processed_costs'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n            DROP TABLE processed_costs\\n            \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_processed_costs",
+            "name": "cleanup_processed_costs",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 06:56:42.645806+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SqliteOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1",
+            "name": "sqlite_operator_cleanup_processed_costs_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696057002645,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057002645,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'cleanup_processed_costs'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n            DROP TABLE processed_costs\\n            '",
+                "task_id": "'cleanup_processed_costs'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n            DROP TABLE processed_costs\\n            \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_processed_costs",
+            "name": "cleanup_processed_costs",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057003759,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json
new file mode 100644
index 0000000000000..c082be693e30c
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json
@@ -0,0 +1,1955 @@
+[
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=sqlite_operator",
+            "name": "sqlite_operator"
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'create_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        '",
+                "task_id": "'create_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['populate_cost_table']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table",
+            "name": "create_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fbeed1180fa0434e02ac6f75ace87869",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 07:00:45.832554+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SqliteOperator",
+                "priority_weight": "5",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1",
+            "name": "sqlite_operator_create_cost_table_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696057245832,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fbeed1180fa0434e02ac6f75ace87869",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fbeed1180fa0434e02ac6f75ace87869",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fbeed1180fa0434e02ac6f75ace87869",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057245832,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'create_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        '",
+                "task_id": "'create_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['populate_cost_table']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS costs (\\n            id INTEGER PRIMARY KEY,\\n            month TEXT NOT NULL,\\n            total_cost REAL NOT NULL,\\n            area REAL NOT NULL\\n        )\\n        \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Create'> (outer statement type: <class 'sqlglot.expressions.Create'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table",
+            "name": "create_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "inputDatajobs": [],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:fbeed1180fa0434e02ac6f75ace87869",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057246734,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=sqlite_operator",
+            "name": "sqlite_operator"
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'populate_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "\"\\n        INSERT INTO costs (id, month, total_cost, area)\\n        VALUES\\n            (1, '2021-01', 100, 10),\\n            (2, '2021-02', 200, 20),\\n            (3, '2021-03', 300, 30)\\n        \"",
+                "task_id": "'populate_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['transform_cost_table']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        INSERT INTO costs (id, month, total_cost, area)\\n        VALUES\\n            (1, '2021-01', 100, 10),\\n            (2, '2021-02', 200, 20),\\n            (3, '2021-03', 300, 30)\\n        \"}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=populate_cost_table",
+            "name": "populate_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 07:00:49.653938+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SqliteOperator",
+                "priority_weight": "4",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1",
+            "name": "sqlite_operator_populate_cost_table_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696057249653,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057249653,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'populate_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "\"\\n        INSERT INTO costs (id, month, total_cost, area)\\n        VALUES\\n            (1, '2021-01', 100, 10),\\n            (2, '2021-02', 200, 20),\\n            (3, '2021-03', 300, 30)\\n        \"",
+                "task_id": "'populate_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['transform_cost_table']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        INSERT INTO costs (id, month, total_cost, area)\\n        VALUES\\n            (1, '2021-01', 100, 10),\\n            (2, '2021-02', 200, 20),\\n            (3, '2021-03', 300, 30)\\n        \"}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=populate_cost_table",
+            "name": "populate_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057250831,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=sqlite_operator",
+            "name": "sqlite_operator"
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'transform_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n        CREATE TABLE IF NOT EXISTS processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        '",
+                "task_id": "'transform_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['cleanup_costs', 'cleanup_processed_costs']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        \"}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=transform_cost_table",
+            "name": "transform_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)"
+            ],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),cost_per_area)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 07:00:53.989264+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SqliteOperator",
+                "priority_weight": "3",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1",
+            "name": "sqlite_operator_transform_cost_table_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696057253989,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceOutput",
+    "aspect": {
+        "json": {
+            "outputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057253989,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'transform_cost_table'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n        CREATE TABLE IF NOT EXISTS processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        '",
+                "task_id": "'transform_cost_table'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "['cleanup_costs', 'cleanup_processed_costs']",
+                "inlets": "[]",
+                "outlets": "[]",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n        CREATE TABLE IF NOT EXISTS processed_costs AS\\n        SELECT\\n            id,\\n            month,\\n            total_cost,\\n            area,\\n            total_cost / area as cost_per_area\\n        FROM costs\\n        \"}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=transform_cost_table",
+            "name": "transform_cost_table",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)"
+            ],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),cost_per_area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),id)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),month)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),total_cost)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),area)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),cost_per_area)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057255628,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=sqlite_operator",
+            "name": "sqlite_operator"
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'cleanup_costs'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n            DROP TABLE costs\\n            '",
+                "task_id": "'cleanup_costs'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n            DROP TABLE costs\\n            \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_costs",
+            "name": "cleanup_costs",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 07:01:00.421177+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SqliteOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1",
+            "name": "sqlite_operator_cleanup_costs_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696057260421,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057260421,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'cleanup_costs'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n            DROP TABLE costs\\n            '",
+                "task_id": "'cleanup_costs'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n            DROP TABLE costs\\n            \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_costs",
+            "name": "cleanup_costs",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057262258,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "dataFlowInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "_access_control": "None",
+                "catchup": "False",
+                "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'",
+                "is_paused_upon_creation": "None",
+                "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
+                "tags": "[]",
+                "timezone": "Timezone('UTC')"
+            },
+            "externalUrl": "http://airflow.example.com/tree?dag_id=sqlite_operator",
+            "name": "sqlite_operator"
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataFlow",
+    "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'cleanup_processed_costs'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n            DROP TABLE processed_costs\\n            '",
+                "task_id": "'cleanup_processed_costs'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n            DROP TABLE processed_costs\\n            \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_processed_costs",
+            "name": "cleanup_processed_costs",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "run_id": "manual_run_test",
+                "duration": "None",
+                "start_date": "2023-09-30 07:01:05.540192+00:00",
+                "end_date": "None",
+                "execution_date": "2023-09-27 21:34:38+00:00",
+                "try_number": "0",
+                "max_tries": "0",
+                "external_executor_id": "None",
+                "state": "running",
+                "operator": "SqliteOperator",
+                "priority_weight": "1",
+                "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1"
+            },
+            "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1",
+            "name": "sqlite_operator_cleanup_processed_costs_manual_run_test",
+            "type": "BATCH_AD_HOC",
+            "created": {
+                "time": 1696057265540,
+                "actor": "urn:li:corpuser:datahub"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRelationships",
+    "aspect": {
+        "json": {
+            "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+            "upstreamInstances": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceInput",
+    "aspect": {
+        "json": {
+            "inputs": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ]
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057265540,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "STARTED",
+            "attempt": 1
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInfo",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "depends_on_past": "False",
+                "email": "None",
+                "label": "'cleanup_processed_costs'",
+                "execution_timeout": "None",
+                "sla": "None",
+                "sql": "'\\n            DROP TABLE processed_costs\\n            '",
+                "task_id": "'cleanup_processed_costs'",
+                "trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
+                "wait_for_downstream": "False",
+                "downstream_task_ids": "[]",
+                "inlets": "[]",
+                "outlets": "[]",
+                "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)",
+                "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n            DROP TABLE processed_costs\\n            \"}",
+                "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not <class 'sqlglot.expressions.Drop'> (outer statement type: <class 'sqlglot.expressions.Drop'>)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}"
+            },
+            "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_processed_costs",
+            "name": "cleanup_processed_costs",
+            "type": {
+                "string": "COMMAND"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "dataJobInputOutput",
+    "aspect": {
+        "json": {
+            "inputDatasets": [
+                "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)"
+            ],
+            "outputDatasets": [],
+            "inputDatajobs": [
+                "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)"
+            ],
+            "fineGrainedLineages": []
+        }
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:airflow",
+                    "type": "DEVELOPER",
+                    "source": {
+                        "type": "SERVICE"
+                    }
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:airflow"
+            }
+        }
+    }
+},
+{
+    "entityType": "dataJob",
+    "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": []
+        }
+    }
+},
+{
+    "entityType": "dataProcessInstance",
+    "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372",
+    "changeType": "UPSERT",
+    "aspectName": "dataProcessInstanceRunEvent",
+    "aspect": {
+        "json": {
+            "timestampMillis": 1696057267631,
+            "partitionSpec": {
+                "type": "FULL_TABLE",
+                "partition": "FULL_TABLE_SNAPSHOT"
+            },
+            "status": "COMPLETE",
+            "result": {
+                "type": "SUCCESS",
+                "nativeResultType": "airflow"
+            }
+        }
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/integration_test_dummy.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/integration_test_dummy.py
deleted file mode 100644
index 10cf3ad0a608a..0000000000000
--- a/metadata-ingestion-modules/airflow-plugin/tests/integration/integration_test_dummy.py
+++ /dev/null
@@ -1,2 +0,0 @@
-def test_dummy():
-    pass
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py
new file mode 100644
index 0000000000000..a2b7fd151a1e4
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py
@@ -0,0 +1,392 @@
+import contextlib
+import dataclasses
+import functools
+import logging
+import os
+import pathlib
+import random
+import signal
+import subprocess
+import time
+from typing import Iterator, Sequence
+
+import pytest
+import requests
+import tenacity
+from airflow.models.connection import Connection
+from datahub.testing.compare_metadata_json import assert_metadata_files_equal
+
+from datahub_airflow_plugin._airflow_shims import (
+    HAS_AIRFLOW_DAG_LISTENER_API,
+    HAS_AIRFLOW_LISTENER_API,
+    HAS_AIRFLOW_STANDALONE_CMD,
+)
+
+pytestmark = pytest.mark.integration
+
+logger = logging.getLogger(__name__)
+IS_LOCAL = os.environ.get("CI", "false") == "false"
+
+DAGS_FOLDER = pathlib.Path(__file__).parent / "dags"
+GOLDENS_FOLDER = pathlib.Path(__file__).parent / "goldens"
+
+
+@dataclasses.dataclass
+class AirflowInstance:
+    airflow_home: pathlib.Path
+    airflow_port: int
+    pid: int
+    env_vars: dict
+
+    username: str
+    password: str
+
+    metadata_file: pathlib.Path
+
+    @property
+    def airflow_url(self) -> str:
+        return f"http://localhost:{self.airflow_port}"
+
+    @functools.cached_property
+    def session(self) -> requests.Session:
+        session = requests.Session()
+        session.auth = (self.username, self.password)
+        return session
+
+
+@tenacity.retry(
+    reraise=True,
+    wait=tenacity.wait_fixed(1),
+    stop=tenacity.stop_after_delay(60),
+    retry=tenacity.retry_if_exception_type(
+        (AssertionError, requests.exceptions.RequestException)
+    ),
+)
+def _wait_for_airflow_healthy(airflow_port: int) -> None:
+    print("Checking if Airflow is ready...")
+    res = requests.get(f"http://localhost:{airflow_port}/health", timeout=5)
+    res.raise_for_status()
+
+    airflow_health = res.json()
+    assert airflow_health["metadatabase"]["status"] == "healthy"
+    assert airflow_health["scheduler"]["status"] == "healthy"
+
+
+class NotReadyError(Exception):
+    pass
+
+
+@tenacity.retry(
+    reraise=True,
+    wait=tenacity.wait_fixed(1),
+    stop=tenacity.stop_after_delay(90),
+    retry=tenacity.retry_if_exception_type(NotReadyError),
+)
+def _wait_for_dag_finish(
+    airflow_instance: AirflowInstance, dag_id: str, require_success: bool
+) -> None:
+    print("Checking if DAG is finished")
+    res = airflow_instance.session.get(
+        f"{airflow_instance.airflow_url}/api/v1/dags/{dag_id}/dagRuns", timeout=5
+    )
+    res.raise_for_status()
+
+    dag_runs = res.json()["dag_runs"]
+    if not dag_runs:
+        raise NotReadyError("No DAG runs found")
+
+    dag_run = dag_runs[0]
+    if dag_run["state"] == "failed":
+        if require_success:
+            raise ValueError("DAG failed")
+        # else - success is not required, so we're done.
+
+    elif dag_run["state"] != "success":
+        raise NotReadyError(f"DAG has not finished yet: {dag_run['state']}")
+
+
+@contextlib.contextmanager
+def _run_airflow(
+    tmp_path: pathlib.Path, dags_folder: pathlib.Path, is_v1: bool
+) -> Iterator[AirflowInstance]:
+    airflow_home = tmp_path / "airflow_home"
+    print(f"Using airflow home: {airflow_home}")
+
+    if IS_LOCAL:
+        airflow_port = 11792
+    else:
+        airflow_port = random.randint(10000, 12000)
+    print(f"Using airflow port: {airflow_port}")
+
+    datahub_connection_name = "datahub_file_default"
+    meta_file = tmp_path / "datahub_metadata.json"
+
+    environment = {
+        **os.environ,
+        "AIRFLOW_HOME": str(airflow_home),
+        "AIRFLOW__WEBSERVER__WEB_SERVER_PORT": str(airflow_port),
+        "AIRFLOW__WEBSERVER__BASE_URL": "http://airflow.example.com",
+        # Point airflow to the DAGs folder.
+        "AIRFLOW__CORE__LOAD_EXAMPLES": "False",
+        "AIRFLOW__CORE__DAGS_FOLDER": str(dags_folder),
+        "AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION": "False",
+        # Have the Airflow API use username/password authentication.
+        "AIRFLOW__API__AUTH_BACKEND": "airflow.api.auth.backend.basic_auth",
+        # Configure the datahub plugin and have it write the MCPs to a file.
+        "AIRFLOW__CORE__LAZY_LOAD_PLUGINS": "False" if is_v1 else "True",
+        "AIRFLOW__DATAHUB__CONN_ID": datahub_connection_name,
+        f"AIRFLOW_CONN_{datahub_connection_name.upper()}": Connection(
+            conn_id="datahub_file_default",
+            conn_type="datahub-file",
+            host=str(meta_file),
+        ).get_uri(),
+        # Configure fake credentials for the Snowflake connection.
+        "AIRFLOW_CONN_MY_SNOWFLAKE": Connection(
+            conn_id="my_snowflake",
+            conn_type="snowflake",
+            login="fake_username",
+            password="fake_password",
+            schema="DATAHUB_TEST_SCHEMA",
+            extra={
+                "account": "fake_account",
+                "database": "DATAHUB_TEST_DATABASE",
+                "warehouse": "fake_warehouse",
+                "role": "fake_role",
+                "insecure_mode": "true",
+            },
+        ).get_uri(),
+        "AIRFLOW_CONN_MY_SQLITE": Connection(
+            conn_id="my_sqlite",
+            conn_type="sqlite",
+            host=str(tmp_path / "my_sqlite.db"),
+        ).get_uri(),
+        # Convenience settings.
+        "AIRFLOW__DATAHUB__LOG_LEVEL": "DEBUG",
+        "AIRFLOW__DATAHUB__DEBUG_EMITTER": "True",
+        "SQLALCHEMY_SILENCE_UBER_WARNING": "1",
+    }
+
+    if not HAS_AIRFLOW_STANDALONE_CMD:
+        raise pytest.skip("Airflow standalone command is not available")
+
+    # Start airflow in a background subprocess.
+    airflow_process = subprocess.Popen(
+        ["airflow", "standalone"],
+        env=environment,
+    )
+
+    try:
+        _wait_for_airflow_healthy(airflow_port)
+        print("Airflow is ready!")
+
+        # Sleep for a few seconds to make sure the other Airflow processes are ready.
+        time.sleep(3)
+
+        # Create an extra "airflow" user for easy testing.
+        if IS_LOCAL:
+            print("Creating an extra test user...")
+            subprocess.check_call(
+                [
+                    # fmt: off
+                    "airflow", "users", "create",
+                    "--username", "airflow",
+                    "--password", "airflow",
+                    "--firstname", "admin",
+                    "--lastname", "admin",
+                    "--role", "Admin",
+                    "--email", "airflow@example.com",
+                    # fmt: on
+                ],
+                env=environment,
+            )
+
+        # Sanity check that the plugin got loaded.
+        if not is_v1:
+            print("[debug] Listing loaded plugins")
+            subprocess.check_call(
+                ["airflow", "plugins", "-v"],
+                env=environment,
+            )
+
+        # Load the admin user's password. This is generated by the
+        # `airflow standalone` command, and is different from the
+        # airflow user that we create when running locally.
+        airflow_username = "admin"
+        airflow_password = (airflow_home / "standalone_admin_password.txt").read_text()
+
+        airflow_instance = AirflowInstance(
+            airflow_home=airflow_home,
+            airflow_port=airflow_port,
+            pid=airflow_process.pid,
+            env_vars=environment,
+            username=airflow_username,
+            password=airflow_password,
+            metadata_file=meta_file,
+        )
+
+        yield airflow_instance
+    finally:
+        try:
+            # Attempt a graceful shutdown.
+            print("Shutting down airflow...")
+            airflow_process.send_signal(signal.SIGINT)
+            airflow_process.wait(timeout=30)
+        except subprocess.TimeoutExpired:
+            # If the graceful shutdown failed, kill the process.
+            print("Hard shutting down airflow...")
+            airflow_process.kill()
+            airflow_process.wait(timeout=3)
+
+
+def check_golden_file(
+    pytestconfig: pytest.Config,
+    output_path: pathlib.Path,
+    golden_path: pathlib.Path,
+    ignore_paths: Sequence[str] = (),
+) -> None:
+    update_golden = pytestconfig.getoption("--update-golden-files")
+
+    assert_metadata_files_equal(
+        output_path=output_path,
+        golden_path=golden_path,
+        update_golden=update_golden,
+        copy_output=False,
+        ignore_paths=ignore_paths,
+        ignore_order=False,
+    )
+
+
+@dataclasses.dataclass
+class DagTestCase:
+    dag_id: str
+    success: bool = True
+
+    v2_only: bool = False
+
+
+test_cases = [
+    DagTestCase("simple_dag"),
+    DagTestCase("basic_iolets"),
+    DagTestCase("snowflake_operator", success=False, v2_only=True),
+    DagTestCase("sqlite_operator", v2_only=True),
+]
+
+
+@pytest.mark.parametrize(
+    ["golden_filename", "test_case", "is_v1"],
+    [
+        # On Airflow <= 2.2, test plugin v1.
+        *[
+            pytest.param(
+                f"v1_{test_case.dag_id}",
+                test_case,
+                True,
+                id=f"v1_{test_case.dag_id}",
+                marks=pytest.mark.skipif(
+                    HAS_AIRFLOW_LISTENER_API,
+                    reason="Not testing plugin v1 on newer Airflow versions",
+                ),
+            )
+            for test_case in test_cases
+            if not test_case.v2_only
+        ],
+        *[
+            pytest.param(
+                # On Airflow 2.3-2.4, test plugin v2 without dataFlows.
+                f"v2_{test_case.dag_id}"
+                if HAS_AIRFLOW_DAG_LISTENER_API
+                else f"v2_{test_case.dag_id}_no_dag_listener",
+                test_case,
+                False,
+                id=f"v2_{test_case.dag_id}"
+                if HAS_AIRFLOW_DAG_LISTENER_API
+                else f"v2_{test_case.dag_id}_no_dag_listener",
+                marks=pytest.mark.skipif(
+                    not HAS_AIRFLOW_LISTENER_API,
+                    reason="Cannot test plugin v2 without the Airflow plugin listener API",
+                ),
+            )
+            for test_case in test_cases
+        ],
+    ],
+)
+def test_airflow_plugin(
+    pytestconfig: pytest.Config,
+    tmp_path: pathlib.Path,
+    golden_filename: str,
+    test_case: DagTestCase,
+    is_v1: bool,
+) -> None:
+    # This test:
+    # - Configures the plugin.
+    # - Starts a local airflow instance in a subprocess.
+    # - Runs a DAG that uses an operator supported by the extractor.
+    # - Waits for the DAG to complete.
+    # - Validates the metadata generated against a golden file.
+
+    if not is_v1 and not test_case.success and not HAS_AIRFLOW_DAG_LISTENER_API:
+        # Saw a number of issues in CI where this would fail to emit the last events
+        # due to an error in the SQLAlchemy listener. This never happened locally for me.
+        pytest.skip("Cannot test failure cases without the Airflow DAG listener API")
+
+    golden_path = GOLDENS_FOLDER / f"{golden_filename}.json"
+    dag_id = test_case.dag_id
+
+    with _run_airflow(
+        tmp_path, dags_folder=DAGS_FOLDER, is_v1=is_v1
+    ) as airflow_instance:
+        print(f"Running DAG {dag_id}...")
+        subprocess.check_call(
+            [
+                "airflow",
+                "dags",
+                "trigger",
+                "--exec-date",
+                "2023-09-27T21:34:38+00:00",
+                "-r",
+                "manual_run_test",
+                dag_id,
+            ],
+            env=airflow_instance.env_vars,
+        )
+
+        print("Waiting for DAG to finish...")
+        _wait_for_dag_finish(
+            airflow_instance, dag_id, require_success=test_case.success
+        )
+
+        print("Sleeping for a few seconds to let the plugin finish...")
+        time.sleep(10)
+
+    check_golden_file(
+        pytestconfig=pytestconfig,
+        output_path=airflow_instance.metadata_file,
+        golden_path=golden_path,
+        ignore_paths=[
+            # Timing-related items.
+            r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['start_date'\]",
+            r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['end_date'\]",
+            r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['duration'\]",
+            # Host-specific items.
+            r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['pid'\]",
+            r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['hostname'\]",
+            r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['unixname'\]",
+            # TODO: If we switched to Git urls, maybe we could get this to work consistently.
+            r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['fileloc'\]",
+            r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['openlineage_.*'\]",
+        ],
+    )
+
+
+if __name__ == "__main__":
+    # When run directly, just set up a local airflow instance.
+    import tempfile
+
+    with _run_airflow(
+        tmp_path=pathlib.Path(tempfile.mkdtemp("airflow-plugin-test")),
+        dags_folder=DAGS_FOLDER,
+        is_v1=not HAS_AIRFLOW_LISTENER_API,
+    ) as airflow_instance:
+        # input("Press enter to exit...")
+        breakpoint()
+        print("quitting airflow")
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py
index 9aa901171cfa6..d8620e74d7e30 100644
--- a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py
+++ b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py
@@ -14,18 +14,21 @@
 import pytest
 from airflow.lineage import apply_lineage, prepare_lineage
 from airflow.models import DAG, Connection, DagBag, DagRun, TaskInstance
-from datahub_provider import get_provider_info
-from datahub_provider._airflow_shims import AIRFLOW_PATCHED, EmptyOperator
-from datahub_provider.entities import Dataset, Urn
-from datahub_provider.hooks.datahub import DatahubKafkaHook, DatahubRestHook
-from datahub_provider.operators.datahub import DatahubEmitterOperator
+
+from datahub_airflow_plugin import get_provider_info
+from datahub_airflow_plugin._airflow_shims import (
+    AIRFLOW_PATCHED,
+    AIRFLOW_VERSION,
+    EmptyOperator,
+)
+from datahub_airflow_plugin.entities import Dataset, Urn
+from datahub_airflow_plugin.hooks.datahub import DatahubKafkaHook, DatahubRestHook
+from datahub_airflow_plugin.operators.datahub import DatahubEmitterOperator
 
 assert AIRFLOW_PATCHED
 
 # TODO: Remove default_view="tree" arg. Figure out why is default_view being picked as "grid" and how to fix it ?
 
-# Approach suggested by https://stackoverflow.com/a/11887885/5004662.
-AIRFLOW_VERSION = packaging.version.parse(airflow.version.version)
 
 lineage_mce = builder.make_lineage_mce(
     [
@@ -105,7 +108,7 @@ def test_datahub_rest_hook(mock_emitter):
 
         mock_emitter.assert_called_once_with(config.host, None, None)
         instance = mock_emitter.return_value
-        instance.emit_mce.assert_called_with(lineage_mce)
+        instance.emit.assert_called_with(lineage_mce)
 
 
 @mock.patch("datahub.emitter.rest_emitter.DatahubRestEmitter", autospec=True)
@@ -119,7 +122,7 @@ def test_datahub_rest_hook_with_timeout(mock_emitter):
 
         mock_emitter.assert_called_once_with(config.host, None, 5)
         instance = mock_emitter.return_value
-        instance.emit_mce.assert_called_with(lineage_mce)
+        instance.emit.assert_called_with(lineage_mce)
 
 
 @mock.patch("datahub.emitter.kafka_emitter.DatahubKafkaEmitter", autospec=True)
@@ -131,11 +134,11 @@ def test_datahub_kafka_hook(mock_emitter):
 
         mock_emitter.assert_called_once()
         instance = mock_emitter.return_value
-        instance.emit_mce_async.assert_called()
+        instance.emit.assert_called()
         instance.flush.assert_called_once()
 
 
-@mock.patch("datahub_provider.hooks.datahub.DatahubRestHook.emit_mces")
+@mock.patch("datahub_provider.hooks.datahub.DatahubRestHook.emit")
 def test_datahub_lineage_operator(mock_emit):
     with patch_airflow_connection(datahub_rest_connection_config) as config:
         assert config.conn_id
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_dummy.py b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_dummy.py
deleted file mode 100644
index 10cf3ad0a608a..0000000000000
--- a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_dummy.py
+++ /dev/null
@@ -1,2 +0,0 @@
-def test_dummy():
-    pass
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_packaging.py b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_packaging.py
new file mode 100644
index 0000000000000..1d0ce5835f958
--- /dev/null
+++ b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_packaging.py
@@ -0,0 +1,8 @@
+import setuptools
+
+
+def test_package_list_match_inits():
+    where = "./src"
+    package_list = set(setuptools.find_packages(where))
+    namespace_packages = set(setuptools.find_namespace_packages(where))
+    assert package_list == namespace_packages, "are you missing a package init file?"
diff --git a/metadata-ingestion-modules/airflow-plugin/tox.ini b/metadata-ingestion-modules/airflow-plugin/tox.ini
index 6a1c06aed8cdd..2f05854940d10 100644
--- a/metadata-ingestion-modules/airflow-plugin/tox.ini
+++ b/metadata-ingestion-modules/airflow-plugin/tox.ini
@@ -4,32 +4,23 @@
 # and then run "tox" from this directory.
 
 [tox]
-envlist = py3-quick,py3-full
-
-[gh-actions]
-python =
-    3.6: py3-full
-    3.9: py3-full
-
-# Providing optional features that add dependencies from setup.py as deps here 
-# allows tox to recreate testenv when new dependencies are added to setup.py.
-# Previous approach of using the tox global setting extras is not recommended  
-# as extras is only called when the testenv is created for the first time!
-# see more here -> https://github.com/tox-dev/tox/issues/1105#issuecomment-448596282
+envlist = py38-airflow21, py38-airflow22, py310-airflow24, py310-airflow26, py310-airflow27
 
 [testenv]
-deps = 
-    -e ../../metadata-ingestion/[.dev]
+use_develop = true
+extras = dev,integration-tests,plugin-v1
+deps =
+    -e ../../metadata-ingestion/
+    # Airflow version
+    airflow21: apache-airflow~=2.1.0
+    airflow22: apache-airflow~=2.2.0
+    airflow24: apache-airflow~=2.4.0
+    airflow26: apache-airflow~=2.6.0
+    airflow27: apache-airflow~=2.7.0
 commands =
-    pytest --cov={envsitepackagesdir}/datahub --cov={envsitepackagesdir}/datahub_provider \
-        py3-quick: -m 'not integration and not slow_integration' --junit-xml=junit.quick.xml \
-        py3-full: --cov-fail-under 65 --junit-xml=junit.full.xml \
-        --continue-on-collection-errors \
-        -vv
+    pytest --cov-append {posargs}
 
-setenv =
-    AIRFLOW_HOME = /tmp/airflow/thisshouldnotexist-{envname}
+# For Airflow 2.4+, add the plugin-v2 extra.
+[testenv:py310-airflow{24,26,27}]
+extras = dev,integration-tests,plugin-v2
 
-[testenv:py3-full]
-deps =
-    ../../metadata-ingestion/.[dev]
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 8fb7b5f29cc22..34afa8cdb39a4 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -1,4 +1,3 @@
-import os
 import sys
 from typing import Dict, Set
 
@@ -9,16 +8,9 @@
     exec(fp.read(), package_metadata)
 
 
-def get_long_description():
-    root = os.path.dirname(__file__)
-    with open(os.path.join(root, "README.md")) as f:
-        description = f.read()
-
-    return description
-
-
 base_requirements = {
-    "typing_extensions>=3.10.0.2",
+    # Typing extension should be >=3.10.0.2 ideally but we can't restrict due to a Airflow 2.1 dependency conflict.
+    "typing_extensions>=3.7.4.3",
     "mypy_extensions>=0.4.3",
     # Actual dependencies.
     "typing-inspect",
@@ -270,6 +262,7 @@ def get_long_description():
     # Sink plugins.
     "datahub-kafka": kafka_common,
     "datahub-rest": rest_common,
+    "sync-file-emitter": {"filelock"},
     "datahub-lite": {
         "duckdb",
         "fastapi",
@@ -670,7 +663,12 @@ def get_long_description():
     },
     license="Apache License 2.0",
     description="A CLI to work with DataHub metadata",
-    long_description=get_long_description(),
+    long_description="""\
+The `acryl-datahub` package contains a CLI and SDK for interacting with DataHub,
+as well as an integration framework for pulling/pushing metadata from external systems.
+
+See the [DataHub docs](https://datahubproject.io/docs/metadata-ingestion).
+""",
     long_description_content_type="text/markdown",
     classifiers=[
         "Development Status :: 5 - Production/Stable",
diff --git a/metadata-ingestion/src/datahub/api/entities/corpgroup/corpgroup.py b/metadata-ingestion/src/datahub/api/entities/corpgroup/corpgroup.py
index 796786beba21b..a898e35bb810e 100644
--- a/metadata-ingestion/src/datahub/api/entities/corpgroup/corpgroup.py
+++ b/metadata-ingestion/src/datahub/api/entities/corpgroup/corpgroup.py
@@ -2,7 +2,7 @@
 
 import logging
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Callable, Iterable, List, Optional, Union
+from typing import Callable, Iterable, List, Optional, Union
 
 import pydantic
 from pydantic import BaseModel
@@ -11,9 +11,10 @@
 from datahub.api.entities.corpuser.corpuser import CorpUser, CorpUserGenerationConfig
 from datahub.configuration.common import ConfigurationError
 from datahub.configuration.validate_field_rename import pydantic_renamed_field
+from datahub.emitter.generic_emitter import Emitter
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.rest_emitter import DatahubRestEmitter
-from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
+from datahub.ingestion.graph.client import DataHubGraph
 from datahub.metadata.schema_classes import (
     CorpGroupEditableInfoClass,
     CorpGroupInfoClass,
@@ -25,9 +26,6 @@
     _Aspect,
 )
 
-if TYPE_CHECKING:
-    from datahub.emitter.kafka_emitter import DatahubKafkaEmitter
-
 logger = logging.getLogger(__name__)
 
 
@@ -194,30 +192,9 @@ def generate_mcp(
                 entityUrn=urn, aspect=StatusClass(removed=False)
             )
 
-    @staticmethod
-    def _datahub_graph_from_datahub_rest_emitter(
-        rest_emitter: DatahubRestEmitter,
-    ) -> DataHubGraph:
-        """
-        Create a datahub graph instance from a REST Emitter.
-        A stop-gap implementation which is expected to be removed after PATCH support is implemented
-        for membership updates for users <-> groups
-        """
-        graph = DataHubGraph(
-            config=DatahubClientConfig(
-                server=rest_emitter._gms_server,
-                token=rest_emitter._token,
-                timeout_sec=rest_emitter._connect_timeout_sec,
-                retry_status_codes=rest_emitter._retry_status_codes,
-                extra_headers=rest_emitter._session.headers,
-                disable_ssl_verification=rest_emitter._session.verify is False,
-            )
-        )
-        return graph
-
     def emit(
         self,
-        emitter: Union[DatahubRestEmitter, "DatahubKafkaEmitter"],
+        emitter: Emitter,
         callback: Optional[Callable[[Exception, str], None]] = None,
     ) -> None:
         """
@@ -235,7 +212,7 @@ def emit(
                 # who are passing in a DataHubRestEmitter today
                 # we won't need this in the future once PATCH support is implemented as all emitters
                 # will work
-                datahub_graph = self._datahub_graph_from_datahub_rest_emitter(emitter)
+                datahub_graph = emitter.to_graph()
         for mcp in self.generate_mcp(
             generation_config=CorpGroupGenerationConfig(
                 override_editable=self.overrideEditable, datahub_graph=datahub_graph
diff --git a/metadata-ingestion/src/datahub/api/entities/corpuser/corpuser.py b/metadata-ingestion/src/datahub/api/entities/corpuser/corpuser.py
index c67eb02a870a5..9fe1ebedafca7 100644
--- a/metadata-ingestion/src/datahub/api/entities/corpuser/corpuser.py
+++ b/metadata-ingestion/src/datahub/api/entities/corpuser/corpuser.py
@@ -1,14 +1,14 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Callable, Iterable, List, Optional, Union
+from typing import Callable, Iterable, List, Optional
 
 import pydantic
 
 import datahub.emitter.mce_builder as builder
 from datahub.configuration.common import ConfigModel
+from datahub.emitter.generic_emitter import Emitter
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
-from datahub.emitter.rest_emitter import DatahubRestEmitter
 from datahub.metadata.schema_classes import (
     CorpUserEditableInfoClass,
     CorpUserInfoClass,
@@ -16,9 +16,6 @@
     StatusClass,
 )
 
-if TYPE_CHECKING:
-    from datahub.emitter.kafka_emitter import DatahubKafkaEmitter
-
 
 @dataclass
 class CorpUserGenerationConfig:
@@ -144,7 +141,7 @@ def generate_mcp(
 
     def emit(
         self,
-        emitter: Union[DatahubRestEmitter, "DatahubKafkaEmitter"],
+        emitter: Emitter,
         callback: Optional[Callable[[Exception, str], None]] = None,
     ) -> None:
         """
diff --git a/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py b/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py
index 8a04768bc0a72..acd708ee81a5c 100644
--- a/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py
+++ b/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py
@@ -1,18 +1,9 @@
 import logging
 from dataclasses import dataclass, field
-from typing import (
-    TYPE_CHECKING,
-    Callable,
-    Dict,
-    Iterable,
-    List,
-    Optional,
-    Set,
-    Union,
-    cast,
-)
+from typing import Callable, Dict, Iterable, List, Optional, Set, cast
 
 import datahub.emitter.mce_builder as builder
+from datahub.emitter.generic_emitter import Emitter
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.metadata.schema_classes import (
     AuditStampClass,
@@ -29,10 +20,6 @@
 )
 from datahub.utilities.urns.data_flow_urn import DataFlowUrn
 
-if TYPE_CHECKING:
-    from datahub.emitter.kafka_emitter import DatahubKafkaEmitter
-    from datahub.emitter.rest_emitter import DatahubRestEmitter
-
 logger = logging.getLogger(__name__)
 
 
@@ -170,7 +157,7 @@ def generate_mcp(self) -> Iterable[MetadataChangeProposalWrapper]:
 
     def emit(
         self,
-        emitter: Union["DatahubRestEmitter", "DatahubKafkaEmitter"],
+        emitter: Emitter,
         callback: Optional[Callable[[Exception, str], None]] = None,
     ) -> None:
         """
diff --git a/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py b/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py
index 7eb6fc8c8d1a9..0face6415bacc 100644
--- a/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py
+++ b/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py
@@ -1,16 +1,16 @@
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Optional, Set, Union
+from typing import Callable, Dict, Iterable, List, Optional, Set
 
 import datahub.emitter.mce_builder as builder
+from datahub.emitter.generic_emitter import Emitter
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.metadata.schema_classes import (
     AuditStampClass,
     AzkabanJobTypeClass,
     DataJobInfoClass,
     DataJobInputOutputClass,
-    DataJobSnapshotClass,
+    FineGrainedLineageClass,
     GlobalTagsClass,
-    MetadataChangeEventClass,
     OwnerClass,
     OwnershipClass,
     OwnershipSourceClass,
@@ -23,10 +23,6 @@
 from datahub.utilities.urns.data_job_urn import DataJobUrn
 from datahub.utilities.urns.dataset_urn import DatasetUrn
 
-if TYPE_CHECKING:
-    from datahub.emitter.kafka_emitter import DatahubKafkaEmitter
-    from datahub.emitter.rest_emitter import DatahubRestEmitter
-
 
 @dataclass
 class DataJob:
@@ -59,6 +55,7 @@ class DataJob:
     group_owners: Set[str] = field(default_factory=set)
     inlets: List[DatasetUrn] = field(default_factory=list)
     outlets: List[DatasetUrn] = field(default_factory=list)
+    fine_grained_lineages: List[FineGrainedLineageClass] = field(default_factory=list)
     upstream_urns: List[DataJobUrn] = field(default_factory=list)
 
     def __post_init__(self):
@@ -103,31 +100,6 @@ def generate_tags_aspect(self) -> Iterable[GlobalTagsClass]:
         )
         return [tags]
 
-    def generate_mce(self) -> MetadataChangeEventClass:
-        job_mce = MetadataChangeEventClass(
-            proposedSnapshot=DataJobSnapshotClass(
-                urn=str(self.urn),
-                aspects=[
-                    DataJobInfoClass(
-                        name=self.name if self.name is not None else self.id,
-                        type=AzkabanJobTypeClass.COMMAND,
-                        description=self.description,
-                        customProperties=self.properties,
-                        externalUrl=self.url,
-                    ),
-                    DataJobInputOutputClass(
-                        inputDatasets=[str(urn) for urn in self.inlets],
-                        outputDatasets=[str(urn) for urn in self.outlets],
-                        inputDatajobs=[str(urn) for urn in self.upstream_urns],
-                    ),
-                    *self.generate_ownership_aspect(),
-                    *self.generate_tags_aspect(),
-                ],
-            )
-        )
-
-        return job_mce
-
     def generate_mcp(self) -> Iterable[MetadataChangeProposalWrapper]:
         mcp = MetadataChangeProposalWrapper(
             entityUrn=str(self.urn),
@@ -159,7 +131,7 @@ def generate_mcp(self) -> Iterable[MetadataChangeProposalWrapper]:
 
     def emit(
         self,
-        emitter: Union["DatahubRestEmitter", "DatahubKafkaEmitter"],
+        emitter: Emitter,
         callback: Optional[Callable[[Exception, str], None]] = None,
     ) -> None:
         """
@@ -179,6 +151,7 @@ def generate_data_input_output_mcp(self) -> Iterable[MetadataChangeProposalWrapp
                 inputDatasets=[str(urn) for urn in self.inlets],
                 outputDatasets=[str(urn) for urn in self.outlets],
                 inputDatajobs=[str(urn) for urn in self.upstream_urns],
+                fineGrainedLineages=self.fine_grained_lineages,
             ),
         )
         yield mcp
diff --git a/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py b/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py
index 9ec389c3a0989..cf6080c7072e6 100644
--- a/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py
+++ b/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py
@@ -1,9 +1,10 @@
 import time
 from dataclasses import dataclass, field
 from enum import Enum
-from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Optional, Union, cast
+from typing import Callable, Dict, Iterable, List, Optional, Union, cast
 
 from datahub.api.entities.datajob import DataFlow, DataJob
+from datahub.emitter.generic_emitter import Emitter
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.mcp_builder import DatahubKey
 from datahub.metadata.com.linkedin.pegasus2avro.dataprocess import (
@@ -26,10 +27,6 @@
 from datahub.utilities.urns.data_process_instance_urn import DataProcessInstanceUrn
 from datahub.utilities.urns.dataset_urn import DatasetUrn
 
-if TYPE_CHECKING:
-    from datahub.emitter.kafka_emitter import DatahubKafkaEmitter
-    from datahub.emitter.rest_emitter import DatahubRestEmitter
-
 
 class DataProcessInstanceKey(DatahubKey):
     cluster: str
@@ -106,7 +103,7 @@ def start_event_mcp(
 
     def emit_process_start(
         self,
-        emitter: Union["DatahubRestEmitter", "DatahubKafkaEmitter"],
+        emitter: Emitter,
         start_timestamp_millis: int,
         attempt: Optional[int] = None,
         emit_template: bool = True,
@@ -197,7 +194,7 @@ def end_event_mcp(
 
     def emit_process_end(
         self,
-        emitter: Union["DatahubRestEmitter", "DatahubKafkaEmitter"],
+        emitter: Emitter,
         end_timestamp_millis: int,
         result: InstanceRunResult,
         result_type: Optional[str] = None,
@@ -207,7 +204,7 @@ def emit_process_end(
         """
         Generate an DataProcessInstance finish event and emits is
 
-        :param emitter: (Union[DatahubRestEmitter, DatahubKafkaEmitter]) the datahub emitter to emit generated mcps
+        :param emitter: (Emitter) the datahub emitter to emit generated mcps
         :param end_timestamp_millis: (int) the end time of the execution in milliseconds
         :param result: (InstanceRunResult) The result of the run
         :param result_type: (string) It identifies the system where the native result comes from like Airflow, Azkaban
@@ -261,24 +258,24 @@ def generate_mcp(
     @staticmethod
     def _emit_mcp(
         mcp: MetadataChangeProposalWrapper,
-        emitter: Union["DatahubRestEmitter", "DatahubKafkaEmitter"],
+        emitter: Emitter,
         callback: Optional[Callable[[Exception, str], None]] = None,
     ) -> None:
         """
 
-        :param emitter: (Union[DatahubRestEmitter, DatahubKafkaEmitter]) the datahub emitter to emit generated mcps
+        :param emitter: (Emitter) the datahub emitter to emit generated mcps
         :param callback: (Optional[Callable[[Exception, str], None]]) the callback method for KafkaEmitter if it is used
         """
         emitter.emit(mcp, callback)
 
     def emit(
         self,
-        emitter: Union["DatahubRestEmitter", "DatahubKafkaEmitter"],
+        emitter: Emitter,
         callback: Optional[Callable[[Exception, str], None]] = None,
     ) -> None:
         """
 
-        :param emitter: (Union[DatahubRestEmitter, DatahubKafkaEmitter]) the datahub emitter to emit generated mcps
+        :param emitter: (Emitter) the datahub emitter to emit generated mcps
         :param callback: (Optional[Callable[[Exception, str], None]]) the callback method for KafkaEmitter if it is used
         """
         for mcp in self.generate_mcp():
diff --git a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py
index 04f12b4f61d1e..2d9b14ceb2d06 100644
--- a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py
+++ b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py
@@ -2,25 +2,15 @@
 
 import time
 from pathlib import Path
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Callable,
-    Dict,
-    Iterable,
-    List,
-    Optional,
-    Tuple,
-    Union,
-)
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
 
 import pydantic
 from ruamel.yaml import YAML
 
 import datahub.emitter.mce_builder as builder
 from datahub.configuration.common import ConfigModel
+from datahub.emitter.generic_emitter import Emitter
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
-from datahub.emitter.rest_emitter import DatahubRestEmitter
 from datahub.ingestion.graph.client import DataHubGraph
 from datahub.metadata.schema_classes import (
     AuditStampClass,
@@ -43,9 +33,6 @@
 from datahub.utilities.registries.domain_registry import DomainRegistry
 from datahub.utilities.urns.urn import Urn
 
-if TYPE_CHECKING:
-    from datahub.emitter.kafka_emitter import DatahubKafkaEmitter
-
 
 def patch_list(
     orig_list: Optional[list],
@@ -225,7 +212,6 @@ def _generate_properties_mcp(
     def generate_mcp(
         self, upsert: bool
     ) -> Iterable[Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass]]:
-
         if self._resolved_domain_urn is None:
             raise Exception(
                 f"Unable to generate MCP-s because we were unable to resolve the domain {self.domain} to an urn."
@@ -282,7 +268,7 @@ def generate_mcp(
 
     def emit(
         self,
-        emitter: Union[DatahubRestEmitter, "DatahubKafkaEmitter"],
+        emitter: Emitter,
         upsert: bool,
         callback: Optional[Callable[[Exception, str], None]] = None,
     ) -> None:
@@ -440,7 +426,6 @@ def patch_yaml(
         original_dataproduct: DataProduct,
         output_file: Path,
     ) -> bool:
-
         update_needed = False
         if not original_dataproduct._original_yaml_dict:
             raise Exception("Original Data Product was not loaded from yaml")
@@ -523,7 +508,6 @@ def to_yaml(
         self,
         file: Path,
     ) -> None:
-
         with open(file, "w") as fp:
             yaml = YAML(typ="rt")  # default, if not specfied, is 'rt' (round-trip)
             yaml.indent(mapping=2, sequence=4, offset=2)
diff --git a/metadata-ingestion/src/datahub/emitter/generic_emitter.py b/metadata-ingestion/src/datahub/emitter/generic_emitter.py
new file mode 100644
index 0000000000000..28138c6182758
--- /dev/null
+++ b/metadata-ingestion/src/datahub/emitter/generic_emitter.py
@@ -0,0 +1,31 @@
+from typing import Any, Callable, Optional, Union
+
+from typing_extensions import Protocol
+
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
+    MetadataChangeEvent,
+    MetadataChangeProposal,
+)
+
+
+class Emitter(Protocol):
+    def emit(
+        self,
+        item: Union[
+            MetadataChangeEvent,
+            MetadataChangeProposal,
+            MetadataChangeProposalWrapper,
+        ],
+        # NOTE: This signature should have the exception be optional rather than
+        #      required. However, this would be a breaking change that may need
+        #      more careful consideration.
+        callback: Optional[Callable[[Exception, str], None]] = None,
+        # TODO: The rest emitter returns timestamps as the return type. For now
+        # we smooth over that detail using Any, but eventually we should
+        # standardize on a return type.
+    ) -> Any:
+        raise NotImplementedError
+
+    def flush(self) -> None:
+        pass
diff --git a/metadata-ingestion/src/datahub/emitter/kafka_emitter.py b/metadata-ingestion/src/datahub/emitter/kafka_emitter.py
index ec0c8f3418a4a..781930011b78f 100644
--- a/metadata-ingestion/src/datahub/emitter/kafka_emitter.py
+++ b/metadata-ingestion/src/datahub/emitter/kafka_emitter.py
@@ -10,6 +10,7 @@
 from datahub.configuration.common import ConfigModel
 from datahub.configuration.kafka import KafkaProducerConnectionConfig
 from datahub.configuration.validate_field_rename import pydantic_renamed_field
+from datahub.emitter.generic_emitter import Emitter
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.closeable import Closeable
 from datahub.metadata.schema_classes import (
@@ -55,7 +56,7 @@ def validate_topic_routes(cls, v: Dict[str, str]) -> Dict[str, str]:
         return v
 
 
-class DatahubKafkaEmitter(Closeable):
+class DatahubKafkaEmitter(Closeable, Emitter):
     def __init__(self, config: KafkaEmitterConfig):
         self.config = config
         schema_registry_conf = {
diff --git a/metadata-ingestion/src/datahub/emitter/rest_emitter.py b/metadata-ingestion/src/datahub/emitter/rest_emitter.py
index 937e0902d6d8c..afb19df9791af 100644
--- a/metadata-ingestion/src/datahub/emitter/rest_emitter.py
+++ b/metadata-ingestion/src/datahub/emitter/rest_emitter.py
@@ -4,7 +4,7 @@
 import logging
 import os
 from json.decoder import JSONDecodeError
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
 
 import requests
 from deprecated import deprecated
@@ -13,6 +13,7 @@
 
 from datahub.cli.cli_utils import get_system_auth
 from datahub.configuration.common import ConfigurationError, OperationalError
+from datahub.emitter.generic_emitter import Emitter
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.request_helper import make_curl_command
 from datahub.emitter.serialization_helper import pre_json_transform
@@ -23,6 +24,9 @@
 )
 from datahub.metadata.com.linkedin.pegasus2avro.usage import UsageAggregation
 
+if TYPE_CHECKING:
+    from datahub.ingestion.graph.client import DataHubGraph
+
 logger = logging.getLogger(__name__)
 
 _DEFAULT_CONNECT_TIMEOUT_SEC = 30  # 30 seconds should be plenty to connect
@@ -42,7 +46,7 @@
 )
 
 
-class DataHubRestEmitter(Closeable):
+class DataHubRestEmitter(Closeable, Emitter):
     _gms_server: str
     _token: Optional[str]
     _session: requests.Session
@@ -190,6 +194,11 @@ def test_connection(self) -> dict:
             message += "\nPlease check your configuration and make sure you are talking to the DataHub GMS (usually <datahub-gms-host>:8080) or Frontend GMS API (usually <frontend>:9002/api/gms)."
             raise ConfigurationError(message)
 
+    def to_graph(self) -> "DataHubGraph":
+        from datahub.ingestion.graph.client import DataHubGraph
+
+        return DataHubGraph.from_emitter(self)
+
     def emit(
         self,
         item: Union[
@@ -198,9 +207,6 @@ def emit(
             MetadataChangeProposalWrapper,
             UsageAggregation,
         ],
-        # NOTE: This signature should have the exception be optional rather than
-        #      required. However, this would be a breaking change that may need
-        #      more careful consideration.
         callback: Optional[Callable[[Exception, str], None]] = None,
     ) -> Tuple[datetime.datetime, datetime.datetime]:
         start_time = datetime.datetime.now()
diff --git a/metadata-ingestion/src/datahub/emitter/synchronized_file_emitter.py b/metadata-ingestion/src/datahub/emitter/synchronized_file_emitter.py
new file mode 100644
index 0000000000000..f82882f1a87cc
--- /dev/null
+++ b/metadata-ingestion/src/datahub/emitter/synchronized_file_emitter.py
@@ -0,0 +1,60 @@
+import logging
+import pathlib
+from typing import Callable, Optional, Union
+
+import filelock
+
+from datahub.emitter.generic_emitter import Emitter
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.api.closeable import Closeable
+from datahub.ingestion.sink.file import write_metadata_file
+from datahub.ingestion.source.file import read_metadata_file
+from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
+    MetadataChangeEvent,
+    MetadataChangeProposal,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class SynchronizedFileEmitter(Closeable, Emitter):
+    """
+    A multiprocessing-safe emitter that writes to a file.
+
+    This emitter is intended for testing purposes only. It is not performant
+    because it reads and writes the full file on every emit call to ensure
+    that the file is always valid JSON.
+    """
+
+    def __init__(self, filename: str) -> None:
+        self._filename = pathlib.Path(filename)
+        self._lock = filelock.FileLock(self._filename.with_suffix(".lock"))
+
+    def emit(
+        self,
+        item: Union[
+            MetadataChangeEvent, MetadataChangeProposal, MetadataChangeProposalWrapper
+        ],
+        callback: Optional[Callable[[Exception, str], None]] = None,
+    ) -> None:
+        with self._lock:
+            if self._filename.exists():
+                metadata = list(read_metadata_file(self._filename))
+            else:
+                metadata = []
+
+            logger.debug("Emitting metadata: %s", item)
+            metadata.append(item)
+
+            write_metadata_file(self._filename, metadata)
+
+    def __repr__(self) -> str:
+        return f"SynchronizedFileEmitter('{self._filename}')"
+
+    def flush(self) -> None:
+        # No-op.
+        pass
+
+    def close(self) -> None:
+        # No-op.
+        pass
diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py
index 673ada4f73051..5120d4f643c94 100644
--- a/metadata-ingestion/src/datahub/ingestion/graph/client.py
+++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py
@@ -138,6 +138,23 @@ def __init__(self, config: DatahubClientConfig) -> None:
             self.server_id = "missing"
             logger.debug(f"Failed to get server id due to {e}")
 
+    @classmethod
+    def from_emitter(cls, emitter: DatahubRestEmitter) -> "DataHubGraph":
+        return cls(
+            DatahubClientConfig(
+                server=emitter._gms_server,
+                token=emitter._token,
+                timeout_sec=emitter._read_timeout_sec,
+                retry_status_codes=emitter._retry_status_codes,
+                retry_max_times=emitter._retry_max_times,
+                extra_headers=emitter._session.headers,
+                disable_ssl_verification=emitter._session.verify is False,
+                # TODO: Support these headers.
+                # ca_certificate_path=emitter._ca_certificate_path,
+                # client_certificate_path=emitter._client_certificate_path,
+            )
+        )
+
     def _send_restli_request(self, method: str, url: str, **kwargs: Any) -> Dict:
         try:
             response = self._session.request(method, url, **kwargs)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py
index f3344782917ab..5fae0ee5215a3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py
@@ -28,7 +28,9 @@
 )
 from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source
 from datahub.ingestion.api.workunit import MetadataWorkUnit
-from datahub.ingestion.source.sql.sql_common import get_platform_from_sqlalchemy_uri
+from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
+    get_platform_from_sqlalchemy_uri,
+)
 from datahub.ingestion.source.state.stale_entity_removal_handler import (
     StaleEntityRemovalHandler,
     StaleEntityRemovalSourceReport,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
index 112defe76d957..056be6c2e50ac 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
@@ -1,12 +1,10 @@
 import datetime
 import logging
 import traceback
-from collections import OrderedDict
 from dataclasses import dataclass, field
 from typing import (
     TYPE_CHECKING,
     Any,
-    Callable,
     Dict,
     Iterable,
     List,
@@ -103,52 +101,6 @@
 MISSING_COLUMN_INFO = "missing column information"
 
 
-def _platform_alchemy_uri_tester_gen(
-    platform: str, opt_starts_with: Optional[str] = None
-) -> Tuple[str, Callable[[str], bool]]:
-    return platform, lambda x: x.startswith(
-        platform if not opt_starts_with else opt_starts_with
-    )
-
-
-PLATFORM_TO_SQLALCHEMY_URI_TESTER_MAP: Dict[str, Callable[[str], bool]] = OrderedDict(
-    [
-        _platform_alchemy_uri_tester_gen("athena", "awsathena"),
-        _platform_alchemy_uri_tester_gen("bigquery"),
-        _platform_alchemy_uri_tester_gen("clickhouse"),
-        _platform_alchemy_uri_tester_gen("druid"),
-        _platform_alchemy_uri_tester_gen("hana"),
-        _platform_alchemy_uri_tester_gen("hive"),
-        _platform_alchemy_uri_tester_gen("mongodb"),
-        _platform_alchemy_uri_tester_gen("mssql"),
-        _platform_alchemy_uri_tester_gen("mysql"),
-        _platform_alchemy_uri_tester_gen("oracle"),
-        _platform_alchemy_uri_tester_gen("pinot"),
-        _platform_alchemy_uri_tester_gen("presto"),
-        (
-            "redshift",
-            lambda x: (
-                x.startswith(("jdbc:postgres:", "postgresql"))
-                and x.find("redshift.amazonaws") > 0
-            )
-            or x.startswith("redshift"),
-        ),
-        # Don't move this before redshift.
-        _platform_alchemy_uri_tester_gen("postgres", "postgresql"),
-        _platform_alchemy_uri_tester_gen("snowflake"),
-        _platform_alchemy_uri_tester_gen("trino"),
-        _platform_alchemy_uri_tester_gen("vertica"),
-    ]
-)
-
-
-def get_platform_from_sqlalchemy_uri(sqlalchemy_uri: str) -> str:
-    for platform, tester in PLATFORM_TO_SQLALCHEMY_URI_TESTER_MAP.items():
-        if tester(sqlalchemy_uri):
-            return platform
-    return "external"
-
-
 @dataclass
 class SQLSourceReport(StaleEntityRemovalSourceReport):
     tables_scanned: int = 0
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py
new file mode 100644
index 0000000000000..b6a463837228d
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py
@@ -0,0 +1,47 @@
+from collections import OrderedDict
+from typing import Callable, Dict, Optional, Tuple
+
+
+def _platform_alchemy_uri_tester_gen(
+    platform: str, opt_starts_with: Optional[str] = None
+) -> Tuple[str, Callable[[str], bool]]:
+    return platform, lambda x: x.startswith(opt_starts_with or platform)
+
+
+PLATFORM_TO_SQLALCHEMY_URI_TESTER_MAP: Dict[str, Callable[[str], bool]] = OrderedDict(
+    [
+        _platform_alchemy_uri_tester_gen("athena", "awsathena"),
+        _platform_alchemy_uri_tester_gen("bigquery"),
+        _platform_alchemy_uri_tester_gen("clickhouse"),
+        _platform_alchemy_uri_tester_gen("druid"),
+        _platform_alchemy_uri_tester_gen("hana"),
+        _platform_alchemy_uri_tester_gen("hive"),
+        _platform_alchemy_uri_tester_gen("mongodb"),
+        _platform_alchemy_uri_tester_gen("mssql"),
+        _platform_alchemy_uri_tester_gen("mysql"),
+        _platform_alchemy_uri_tester_gen("oracle"),
+        _platform_alchemy_uri_tester_gen("pinot"),
+        _platform_alchemy_uri_tester_gen("presto"),
+        (
+            "redshift",
+            lambda x: (
+                x.startswith(("jdbc:postgres:", "postgresql"))
+                and x.find("redshift.amazonaws") > 0
+            )
+            or x.startswith("redshift"),
+        ),
+        # Don't move this before redshift.
+        _platform_alchemy_uri_tester_gen("postgres", "postgresql"),
+        _platform_alchemy_uri_tester_gen("snowflake"),
+        _platform_alchemy_uri_tester_gen("sqlite"),
+        _platform_alchemy_uri_tester_gen("trino"),
+        _platform_alchemy_uri_tester_gen("vertica"),
+    ]
+)
+
+
+def get_platform_from_sqlalchemy_uri(sqlalchemy_uri: str) -> str:
+    for platform, tester in PLATFORM_TO_SQLALCHEMY_URI_TESTER_MAP.items():
+        if tester(sqlalchemy_uri):
+            return platform
+    return "external"
diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py
index 2a4563439b6ba..14bc4242d2a91 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/superset.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py
@@ -21,7 +21,9 @@
 )
 from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source
 from datahub.ingestion.api.workunit import MetadataWorkUnit
-from datahub.ingestion.source.sql import sql_common
+from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
+    get_platform_from_sqlalchemy_uri,
+)
 from datahub.ingestion.source.state.stale_entity_removal_handler import (
     StaleEntityRemovalHandler,
     StaleEntityRemovalSourceReport,
@@ -202,7 +204,7 @@ def get_platform_from_database_id(self, database_id):
         sqlalchemy_uri = database_response.get("result", {}).get("sqlalchemy_uri")
         if sqlalchemy_uri is None:
             return database_response.get("result", {}).get("backend", "external")
-        return sql_common.get_platform_from_sqlalchemy_uri(sqlalchemy_uri)
+        return get_platform_from_sqlalchemy_uri(sqlalchemy_uri)
 
     @lru_cache(maxsize=None)
     def get_datasource_urn_from_id(self, datasource_id):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
index 4cc00a66116e9..6214cba342622 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
@@ -1179,8 +1179,6 @@ def get_upstream_fields_of_field_in_datasource(
     def get_upstream_fields_from_custom_sql(
         self, datasource: dict, datasource_urn: str
     ) -> List[FineGrainedLineage]:
-        fine_grained_lineages: List[FineGrainedLineage] = []
-
         parsed_result = self.parse_custom_sql(
             datasource=datasource,
             datasource_urn=datasource_urn,
@@ -1194,13 +1192,20 @@ def get_upstream_fields_from_custom_sql(
             logger.info(
                 f"Failed to extract column level lineage from datasource {datasource_urn}"
             )
-            return fine_grained_lineages
+            return []
+        if parsed_result.debug_info.error:
+            logger.info(
+                f"Failed to extract column level lineage from datasource {datasource_urn}: {parsed_result.debug_info.error}"
+            )
+            return []
 
         cll: List[ColumnLineageInfo] = (
             parsed_result.column_lineage
             if parsed_result.column_lineage is not None
             else []
         )
+
+        fine_grained_lineages: List[FineGrainedLineage] = []
         for cll_info in cll:
             downstream = (
                 [
diff --git a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py
index eabf62a4cda2b..f116550328819 100644
--- a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py
+++ b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py
@@ -35,7 +35,9 @@
 from datahub.cli.cli_utils import get_boolean_env_variable
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.rest_emitter import DatahubRestEmitter
-from datahub.ingestion.source.sql.sql_common import get_platform_from_sqlalchemy_uri
+from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
+    get_platform_from_sqlalchemy_uri,
+)
 from datahub.metadata.com.linkedin.pegasus2avro.assertion import (
     AssertionInfo,
     AssertionResult,
diff --git a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py
index 5c52e1ab4f0b3..54f6a6e984c00 100644
--- a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py
+++ b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py
@@ -40,6 +40,7 @@ def assert_metadata_files_equal(
     update_golden: bool,
     copy_output: bool,
     ignore_paths: Sequence[str] = (),
+    ignore_order: bool = True,
 ) -> None:
     golden_exists = os.path.isfile(golden_path)
 
@@ -65,7 +66,7 @@ def assert_metadata_files_equal(
             write_metadata_file(pathlib.Path(temp.name), golden_metadata)
             golden = load_json_file(temp.name)
 
-    diff = diff_metadata_json(output, golden, ignore_paths)
+    diff = diff_metadata_json(output, golden, ignore_paths, ignore_order=ignore_order)
     if diff and update_golden:
         if isinstance(diff, MCPDiff):
             diff.apply_delta(golden)
@@ -91,16 +92,19 @@ def diff_metadata_json(
     output: MetadataJson,
     golden: MetadataJson,
     ignore_paths: Sequence[str] = (),
+    ignore_order: bool = True,
 ) -> Union[DeepDiff, MCPDiff]:
     ignore_paths = (*ignore_paths, *default_exclude_paths, r"root\[\d+].delta_info")
     try:
-        golden_map = get_aspects_by_urn(golden)
-        output_map = get_aspects_by_urn(output)
-        return MCPDiff.create(
-            golden=golden_map,
-            output=output_map,
-            ignore_paths=ignore_paths,
-        )
+        if ignore_order:
+            golden_map = get_aspects_by_urn(golden)
+            output_map = get_aspects_by_urn(output)
+            return MCPDiff.create(
+                golden=golden_map,
+                output=output_map,
+                ignore_paths=ignore_paths,
+            )
+        # if ignore_order is False, always use DeepDiff
     except CannotCompareMCPs as e:
         logger.info(f"{e}, falling back to MCE diff")
     except AssertionError as e:
@@ -111,5 +115,5 @@ def diff_metadata_json(
         golden,
         output,
         exclude_regex_paths=ignore_paths,
-        ignore_order=True,
+        ignore_order=ignore_order,
     )
diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
index f18235af3d1fd..4b3090eaaad31 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
@@ -231,6 +231,13 @@ def _table_level_lineage(
         # In some cases like "MERGE ... then INSERT (col1, col2) VALUES (col1, col2)",
         # the `this` on the INSERT part isn't a table.
         if isinstance(expr.this, sqlglot.exp.Table)
+    } | {
+        # For CREATE DDL statements, the table name is nested inside
+        # a Schema object.
+        _TableName.from_sqlglot_table(expr.this.this)
+        for expr in statement.find_all(sqlglot.exp.Create)
+        if isinstance(expr.this, sqlglot.exp.Schema)
+        and isinstance(expr.this.this, sqlglot.exp.Table)
     }
 
     tables = (
@@ -242,7 +249,7 @@ def _table_level_lineage(
         - modified
         # ignore CTEs created in this statement
         - {
-            _TableName(database=None, schema=None, table=cte.alias_or_name)
+            _TableName(database=None, db_schema=None, table=cte.alias_or_name)
             for cte in statement.find_all(sqlglot.exp.CTE)
         }
     )
@@ -906,32 +913,39 @@ def create_lineage_sql_parsed_result(
     env: str,
     schema: Optional[str] = None,
     graph: Optional[DataHubGraph] = None,
-) -> Optional["SqlParsingResult"]:
-    parsed_result: Optional["SqlParsingResult"] = None
+) -> SqlParsingResult:
+    needs_close = False
     try:
-        schema_resolver = (
-            graph._make_schema_resolver(
+        if graph:
+            schema_resolver = graph._make_schema_resolver(
                 platform=platform,
                 platform_instance=platform_instance,
                 env=env,
             )
-            if graph is not None
-            else SchemaResolver(
+        else:
+            needs_close = True
+            schema_resolver = SchemaResolver(
                 platform=platform,
                 platform_instance=platform_instance,
                 env=env,
                 graph=None,
             )
-        )
 
-        parsed_result = sqlglot_lineage(
+        return sqlglot_lineage(
             query,
             schema_resolver=schema_resolver,
             default_db=database,
             default_schema=schema,
         )
     except Exception as e:
-        logger.debug(f"Fail to prase query {query}", exc_info=e)
-        logger.warning("Fail to parse custom SQL")
-
-    return parsed_result
+        return SqlParsingResult(
+            in_tables=[],
+            out_tables=[],
+            column_lineage=None,
+            debug_info=SqlParsingDebugInfo(
+                table_error=e,
+            ),
+        )
+    finally:
+        if needs_close:
+            schema_resolver.close()
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json
new file mode 100644
index 0000000000000..4773974545bfa
--- /dev/null
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json
@@ -0,0 +1,8 @@
+{
+    "query_type": "CREATE",
+    "in_tables": [],
+    "out_tables": [
+        "urn:li:dataset:(urn:li:dataPlatform:sqlite,costs,PROD)"
+    ],
+    "column_lineage": null
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
index 483c1ac4cc7f9..2a965a9bb1e61 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
+++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
@@ -274,6 +274,21 @@ def test_expand_select_star_basic():
     )
 
 
+def test_create_table_ddl():
+    assert_sql_result(
+        """
+CREATE TABLE IF NOT EXISTS costs (
+    id INTEGER PRIMARY KEY,
+    month TEXT NOT NULL,
+    total_cost REAL NOT NULL,
+    area REAL NOT NULL
+)
+""",
+        dialect="sqlite",
+        expected_file=RESOURCE_DIR / "test_create_table_ddl.json",
+    )
+
+
 def test_snowflake_column_normalization():
     # Technically speaking this is incorrect since the column names are different and both quoted.
 
diff --git a/metadata-ingestion/tests/unit/test_sql_common.py b/metadata-ingestion/tests/unit/test_sql_common.py
index 95af0e623e991..808b38192411d 100644
--- a/metadata-ingestion/tests/unit/test_sql_common.py
+++ b/metadata-ingestion/tests/unit/test_sql_common.py
@@ -4,12 +4,11 @@
 import pytest
 from sqlalchemy.engine.reflection import Inspector
 
-from datahub.ingestion.source.sql.sql_common import (
-    PipelineContext,
-    SQLAlchemySource,
+from datahub.ingestion.source.sql.sql_common import PipelineContext, SQLAlchemySource
+from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
+from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
     get_platform_from_sqlalchemy_uri,
 )
-from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
 
 
 class _TestSQLAlchemyConfig(SQLCommonConfig):

From e3780c2d75e4dc4dc95e83476d103a4454ee2aae Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Wed, 4 Oct 2023 16:23:31 +0530
Subject: [PATCH 083/156] =?UTF-8?q?feat(ingest/snowflake):=20initialize=20?=
 =?UTF-8?q?schema=20resolver=20from=20datahub=20for=20l=E2=80=A6=20(#8903)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../src/datahub/ingestion/graph/client.py     |  8 ++---
 .../ingestion/source/bigquery_v2/bigquery.py  |  2 +-
 .../source/snowflake/snowflake_config.py      |  4 +--
 .../source/snowflake/snowflake_v2.py          | 33 ++++++++++++-------
 .../datahub/ingestion/source/sql_queries.py   |  5 ++-
 .../src/datahub/utilities/sqlglot_lineage.py  |  5 +--
 6 files changed, 33 insertions(+), 24 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py
index 5120d4f643c94..ccff677c3a471 100644
--- a/metadata-ingestion/src/datahub/ingestion/graph/client.py
+++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py
@@ -7,7 +7,7 @@
 from dataclasses import dataclass
 from datetime import datetime
 from json.decoder import JSONDecodeError
-from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tuple, Type
+from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Type
 
 from avro.schema import RecordSchema
 from deprecated import deprecated
@@ -1010,14 +1010,13 @@ def _make_schema_resolver(
 
     def initialize_schema_resolver_from_datahub(
         self, platform: str, platform_instance: Optional[str], env: str
-    ) -> Tuple["SchemaResolver", Set[str]]:
+    ) -> "SchemaResolver":
         logger.info("Initializing schema resolver")
         schema_resolver = self._make_schema_resolver(
             platform, platform_instance, env, include_graph=False
         )
 
         logger.info(f"Fetching schemas for platform {platform}, env {env}")
-        urns = []
         count = 0
         with PerfTimer() as timer:
             for urn, schema_info in self._bulk_fetch_schema_info_by_filter(
@@ -1026,7 +1025,6 @@ def initialize_schema_resolver_from_datahub(
                 env=env,
             ):
                 try:
-                    urns.append(urn)
                     schema_resolver.add_graphql_schema_metadata(urn, schema_info)
                     count += 1
                 except Exception:
@@ -1041,7 +1039,7 @@ def initialize_schema_resolver_from_datahub(
             )
 
         logger.info("Finished initializing schema resolver")
-        return schema_resolver, set(urns)
+        return schema_resolver
 
     def parse_sql_lineage(
         self,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
index 8a16b1a4a5f6b..f6adbcf033bcc 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
@@ -458,7 +458,7 @@ def _init_schema_resolver(self) -> SchemaResolver:
                     platform=self.platform,
                     platform_instance=self.config.platform_instance,
                     env=self.config.env,
-                )[0]
+                )
             else:
                 logger.warning(
                     "Failed to load schema info from DataHub as DataHubGraph is missing.",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
index 95f6444384408..032bdef178fdf 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
@@ -101,8 +101,8 @@ class SnowflakeV2Config(
     )
 
     include_view_column_lineage: bool = Field(
-        default=False,
-        description="Populates view->view and table->view column lineage.",
+        default=True,
+        description="Populates view->view and table->view column lineage using DataHub's sql parser.",
     )
 
     _check_role_grants_removed = pydantic_removed_field("check_role_grants")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
index 240e0ffa1a0b6..215116b4c33fb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
@@ -301,14 +301,11 @@ def __init__(self, ctx: PipelineContext, config: SnowflakeV2Config):
         # Caches tables for a single database. Consider moving to disk or S3 when possible.
         self.db_tables: Dict[str, List[SnowflakeTable]] = {}
 
-        self.sql_parser_schema_resolver = SchemaResolver(
-            platform=self.platform,
-            platform_instance=self.config.platform_instance,
-            env=self.config.env,
-        )
         self.view_definitions: FileBackedDict[str] = FileBackedDict()
         self.add_config_to_report()
 
+        self.sql_parser_schema_resolver = self._init_schema_resolver()
+
     @classmethod
     def create(cls, config_dict: dict, ctx: PipelineContext) -> "Source":
         config = SnowflakeV2Config.parse_obj(config_dict)
@@ -493,6 +490,24 @@ def query(query):
 
         return _report
 
+    def _init_schema_resolver(self) -> SchemaResolver:
+        if not self.config.include_technical_schema and self.config.parse_view_ddl:
+            if self.ctx.graph:
+                return self.ctx.graph.initialize_schema_resolver_from_datahub(
+                    platform=self.platform,
+                    platform_instance=self.config.platform_instance,
+                    env=self.config.env,
+                )
+            else:
+                logger.warning(
+                    "Failed to load schema info from DataHub as DataHubGraph is missing.",
+                )
+        return SchemaResolver(
+            platform=self.platform,
+            platform_instance=self.config.platform_instance,
+            env=self.config.env,
+        )
+
     def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
         return [
             *super().get_workunit_processors(),
@@ -764,7 +779,7 @@ def _process_schema(
             )
             self.db_tables[schema_name] = tables
 
-            if self.config.include_technical_schema or self.config.parse_view_ddl:
+            if self.config.include_technical_schema:
                 for table in tables:
                     yield from self._process_table(table, schema_name, db_name)
 
@@ -776,7 +791,7 @@ def _process_schema(
                     if view.view_definition:
                         self.view_definitions[key] = view.view_definition
 
-            if self.config.include_technical_schema or self.config.parse_view_ddl:
+            if self.config.include_technical_schema:
                 for view in views:
                     yield from self._process_view(view, schema_name, db_name)
 
@@ -892,8 +907,6 @@ def _process_table(
                     yield from self._process_tag(tag)
 
             yield from self.gen_dataset_workunits(table, schema_name, db_name)
-        elif self.config.parse_view_ddl:
-            self.gen_schema_metadata(table, schema_name, db_name)
 
     def fetch_sample_data_for_classification(
         self, table: SnowflakeTable, schema_name: str, db_name: str, dataset_name: str
@@ -1004,8 +1017,6 @@ def _process_view(
                     yield from self._process_tag(tag)
 
             yield from self.gen_dataset_workunits(view, schema_name, db_name)
-        elif self.config.parse_view_ddl:
-            self.gen_schema_metadata(view, schema_name, db_name)
 
     def _process_tag(self, tag: SnowflakeTag) -> Iterable[MetadataWorkUnit]:
         tag_identifier = tag.identifier()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py b/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py
index 2fcc93292c2ef..bce4d1ec76e6e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py
@@ -103,13 +103,12 @@ def __init__(self, ctx: PipelineContext, config: SqlQueriesSourceConfig):
         self.builder = SqlParsingBuilder(usage_config=self.config.usage)
 
         if self.config.use_schema_resolver:
-            schema_resolver, urns = self.graph.initialize_schema_resolver_from_datahub(
+            self.schema_resolver = self.graph.initialize_schema_resolver_from_datahub(
                 platform=self.config.platform,
                 platform_instance=self.config.platform_instance,
                 env=self.config.env,
             )
-            self.schema_resolver = schema_resolver
-            self.urns = urns
+            self.urns = self.schema_resolver.get_urns()
         else:
             self.schema_resolver = self.graph._make_schema_resolver(
                 platform=self.config.platform,
diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
index 4b3090eaaad31..81c43884fdf7d 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
@@ -283,6 +283,9 @@ def __init__(
             shared_connection=shared_conn,
         )
 
+    def get_urns(self) -> Set[str]:
+        return set(self._schema_cache.keys())
+
     def get_urn_for_table(self, table: _TableName, lower: bool = False) -> str:
         # TODO: Validate that this is the correct 2/3 layer hierarchy for the platform.
 
@@ -397,8 +400,6 @@ def convert_graphql_schema_metadata_to_info(
             )
         }
 
-    # TODO add a method to load all from graphql
-
     def close(self) -> None:
         self._schema_cache.close()
 

From 13508a9d888df519a389b6bd187b5f745772627b Mon Sep 17 00:00:00 2001
From: Upendra Rao Vedullapalli <upendrao@gmail.com>
Date: Wed, 4 Oct 2023 15:20:51 +0200
Subject: [PATCH 084/156] feat(bigquery): excluding projects without any
 datasets from ingestion (#8535)

Co-authored-by: Upendra Vedullapalli <upendra.rao.vedullapalli@entur.org>
Co-authored-by: Andrew Sikowitz <andrew.sikowitz@acryl.io>
---
 .../ingestion/source/bigquery_v2/bigquery.py  | 19 +++++--
 .../source/bigquery_v2/bigquery_config.py     |  5 ++
 .../source/bigquery_v2/bigquery_report.py     |  2 +
 .../tests/unit/test_bigquery_source.py        | 53 ++++++++++++++++++-
 4 files changed, 72 insertions(+), 7 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
index f6adbcf033bcc..fee181864a2d6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
@@ -600,9 +600,6 @@ def _process_project(
         db_views: Dict[str, List[BigqueryView]] = {}
 
         project_id = bigquery_project.id
-
-        yield from self.gen_project_id_containers(project_id)
-
         try:
             bigquery_project.datasets = (
                 self.bigquery_data_dictionary.get_datasets_for_project_id(project_id)
@@ -619,11 +616,23 @@ def _process_project(
             return None
 
         if len(bigquery_project.datasets) == 0:
-            logger.warning(
-                f"No dataset found in {project_id}. Either there are no datasets in this project or missing bigquery.datasets.get permission. You can assign predefined roles/bigquery.metadataViewer role to your service account."
+            more_info = (
+                "Either there are no datasets in this project or missing bigquery.datasets.get permission. "
+                "You can assign predefined roles/bigquery.metadataViewer role to your service account."
             )
+            if self.config.exclude_empty_projects:
+                self.report.report_dropped(project_id)
+                warning_message = f"Excluded project '{project_id}' since no were datasets found. {more_info}"
+            else:
+                yield from self.gen_project_id_containers(project_id)
+                warning_message = (
+                    f"No datasets found in project '{project_id}'. {more_info}"
+                )
+            logger.warning(warning_message)
             return
 
+        yield from self.gen_project_id_containers(project_id)
+
         self.report.num_project_datasets_to_scan[project_id] = len(
             bigquery_project.datasets
         )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
index 3b06a4699c566..483355a85ac05 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
@@ -265,6 +265,11 @@ def validate_column_lineage(cls, v: bool, values: Dict[str, Any]) -> bool:
         description="Maximum number of entries for the in-memory caches of FileBacked data structures.",
     )
 
+    exclude_empty_projects: bool = Field(
+        default=False,
+        description="Option to exclude empty projects from being ingested.",
+    )
+
     @root_validator(pre=False)
     def profile_default_settings(cls, values: Dict) -> Dict:
         # Extra default SQLAlchemy option for better connection pooling and threading.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py
index 661589a0c58e5..9d92b011ee285 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py
@@ -122,6 +122,8 @@ class BigQueryV2Report(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowR
 
     usage_state_size: Optional[str] = None
 
+    exclude_empty_projects: Optional[bool] = None
+
     schema_api_perf: BigQuerySchemaApiPerfReport = field(
         default_factory=BigQuerySchemaApiPerfReport
     )
diff --git a/metadata-ingestion/tests/unit/test_bigquery_source.py b/metadata-ingestion/tests/unit/test_bigquery_source.py
index 4fc6c31626ba8..e9e91361f49f4 100644
--- a/metadata-ingestion/tests/unit/test_bigquery_source.py
+++ b/metadata-ingestion/tests/unit/test_bigquery_source.py
@@ -3,13 +3,14 @@
 import os
 from datetime import datetime, timedelta, timezone
 from types import SimpleNamespace
-from typing import Any, Dict, Optional, cast
+from typing import Any, Dict, List, Optional, cast
 from unittest.mock import MagicMock, Mock, patch
 
 import pytest
 from google.api_core.exceptions import GoogleAPICallError
 from google.cloud.bigquery.table import Row, TableListItem
 
+from datahub.configuration.common import AllowDenyPattern
 from datahub.ingestion.api.common import PipelineContext
 from datahub.ingestion.source.bigquery_v2.bigquery import BigqueryV2Source
 from datahub.ingestion.source.bigquery_v2.bigquery_audit import (
@@ -17,9 +18,13 @@
     BigqueryTableIdentifier,
     BigQueryTableRef,
 )
-from datahub.ingestion.source.bigquery_v2.bigquery_config import BigQueryV2Config
+from datahub.ingestion.source.bigquery_v2.bigquery_config import (
+    BigQueryConnectionConfig,
+    BigQueryV2Config,
+)
 from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report
 from datahub.ingestion.source.bigquery_v2.bigquery_schema import (
+    BigqueryDataset,
     BigqueryProject,
     BigQuerySchemaApi,
     BigqueryView,
@@ -854,3 +859,47 @@ def test_get_table_name(full_table_name: str, datahub_full_table_name: str) -> N
             BigqueryTableIdentifier.from_string_name(full_table_name).get_table_name()
             == datahub_full_table_name
         )
+
+
+def test_default_config_for_excluding_projects_and_datasets():
+    config = BigQueryV2Config.parse_obj({})
+    assert config.exclude_empty_projects is False
+    config = BigQueryV2Config.parse_obj({"exclude_empty_projects": True})
+    assert config.exclude_empty_projects
+
+
+@patch.object(BigQueryConnectionConfig, "get_bigquery_client", new=lambda self: None)
+@patch.object(BigQuerySchemaApi, "get_datasets_for_project_id")
+def test_excluding_empty_projects_from_ingestion(
+    get_datasets_for_project_id_mock,
+):
+    project_id_with_datasets = "project-id-with-datasets"
+    project_id_without_datasets = "project-id-without-datasets"
+
+    def get_datasets_for_project_id_side_effect(
+        project_id: str,
+    ) -> List[BigqueryDataset]:
+        return (
+            []
+            if project_id == project_id_without_datasets
+            else [BigqueryDataset("some-dataset")]
+        )
+
+    get_datasets_for_project_id_mock.side_effect = (
+        get_datasets_for_project_id_side_effect
+    )
+
+    base_config = {
+        "project_ids": [project_id_with_datasets, project_id_without_datasets],
+        "schema_pattern": AllowDenyPattern(deny=[".*"]),
+        "include_usage_statistics": False,
+        "include_table_lineage": False,
+    }
+
+    config = BigQueryV2Config.parse_obj(base_config)
+    source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test-1"))
+    assert len({wu.metadata.entityUrn for wu in source.get_workunits()}) == 2  # type: ignore
+
+    config = BigQueryV2Config.parse_obj({**base_config, "exclude_empty_projects": True})
+    source = BigqueryV2Source(config=config, ctx=PipelineContext(run_id="test-2"))
+    assert len({wu.metadata.entityUrn for wu in source.get_workunits()}) == 1  # type: ignore

From d3346a04e486fa098129b626e61013cab4f69350 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Wed, 4 Oct 2023 10:22:45 -0400
Subject: [PATCH 085/156] feat(ingest/unity): Ingest notebooks and their
 lineage (#8940)

---
 .../sources/databricks/unity-catalog_pre.md   |   1 +
 metadata-ingestion/setup.py                   |   2 +-
 .../src/datahub/emitter/mcp_builder.py        |  12 ++
 .../ingestion/source/common/subtypes.py       |   3 +
 .../datahub/ingestion/source/unity/config.py  |   9 +-
 .../datahub/ingestion/source/unity/proxy.py   |  89 +++++++----
 .../ingestion/source/unity/proxy_types.py     |  45 +++++-
 .../datahub/ingestion/source/unity/report.py  |   8 +-
 .../datahub/ingestion/source/unity/source.py  | 148 ++++++++++++++----
 .../datahub/ingestion/source/unity/usage.py   |  12 +-
 10 files changed, 257 insertions(+), 72 deletions(-)

diff --git a/metadata-ingestion/docs/sources/databricks/unity-catalog_pre.md b/metadata-ingestion/docs/sources/databricks/unity-catalog_pre.md
index 2be8846b87bea..ae2883343d7e8 100644
--- a/metadata-ingestion/docs/sources/databricks/unity-catalog_pre.md
+++ b/metadata-ingestion/docs/sources/databricks/unity-catalog_pre.md
@@ -13,6 +13,7 @@
     * Ownership of or `SELECT` privilege on any tables and views you want to ingest
     * [Ownership documentation](https://docs.databricks.com/data-governance/unity-catalog/manage-privileges/ownership.html)
     * [Privileges documentation](https://docs.databricks.com/data-governance/unity-catalog/manage-privileges/privileges.html)
+  + To ingest your workspace's notebooks and respective lineage, your service principal must have `CAN_READ` privileges on the folders containing the notebooks you want to ingest: [guide](https://docs.databricks.com/en/security/auth-authz/access-control/workspace-acl.html#folder-permissions).
   + To `include_usage_statistics` (enabled by default), your service principal must have `CAN_MANAGE` permissions on any SQL Warehouses you want to ingest: [guide](https://docs.databricks.com/security/auth-authz/access-control/sql-endpoint-acl.html).
   + To ingest `profiling` information with `call_analyze` (enabled by default), your service principal must have ownership or `MODIFY` privilege on any tables you want to profile.
     * Alternatively, you can run [ANALYZE TABLE](https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-analyze-table.html) yourself on any tables you want to profile, then set `call_analyze` to `false`.
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 34afa8cdb39a4..fe8e3be4632c4 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -250,7 +250,7 @@
 
 databricks = {
     # 0.1.11 appears to have authentication issues with azure databricks
-    "databricks-sdk>=0.1.1, != 0.1.11",
+    "databricks-sdk>=0.9.0",
     "pyspark",
     "requests",
 }
diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py
index 844a29f1c78a3..7419577b367aa 100644
--- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py
+++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py
@@ -9,6 +9,7 @@
     make_container_urn,
     make_data_platform_urn,
     make_dataplatform_instance_urn,
+    make_dataset_urn_with_platform_instance,
 )
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.workunit import MetadataWorkUnit
@@ -125,6 +126,17 @@ class BucketKey(ContainerKey):
     bucket_name: str
 
 
+class NotebookKey(DatahubKey):
+    notebook_id: int
+    platform: str
+    instance: Optional[str]
+
+    def as_urn(self) -> str:
+        return make_dataset_urn_with_platform_instance(
+            platform=self.platform, platform_instance=self.instance, name=self.guid()
+        )
+
+
 class DatahubKeyJSONEncoder(json.JSONEncoder):
     # overload method default
     def default(self, obj: Any) -> Any:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py
index a2d89d26112f4..741b4789bef21 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py
@@ -16,6 +16,9 @@ class DatasetSubTypes(str, Enum):
     SALESFORCE_STANDARD_OBJECT = "Object"
     POWERBI_DATASET_TABLE = "PowerBI Dataset Table"
 
+    # TODO: Create separate entity...
+    NOTEBOOK = "Notebook"
+
 
 class DatasetContainerSubTypes(str, Enum):
     # Generic SubTypes
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
index 94ff755e3b254..a49c789a82f27 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
@@ -127,11 +127,16 @@ class UnityCatalogSourceConfig(
         description='Attach domains to catalogs, schemas or tables during ingestion using regex patterns. Domain key can be a guid like *urn:li:domain:ec428203-ce86-4db3-985d-5a8ee6df32ba* or a string like "Marketing".) If you provide strings, then datahub will attempt to resolve this name to a guid, and will error out if this fails. There can be multiple domain keys specified.',
     )
 
-    include_table_lineage: Optional[bool] = pydantic.Field(
+    include_table_lineage: bool = pydantic.Field(
         default=True,
         description="Option to enable/disable lineage generation.",
     )
 
+    include_notebooks: bool = pydantic.Field(
+        default=False,
+        description="Ingest notebooks, represented as DataHub datasets.",
+    )
+
     include_ownership: bool = pydantic.Field(
         default=False,
         description="Option to enable/disable ownership generation for metastores, catalogs, schemas, and tables.",
@@ -141,7 +146,7 @@ class UnityCatalogSourceConfig(
         "include_table_ownership", "include_ownership"
     )
 
-    include_column_lineage: Optional[bool] = pydantic.Field(
+    include_column_lineage: bool = pydantic.Field(
         default=True,
         description="Option to enable/disable lineage generation. Currently we have to call a rest call per column to get column level lineage due to the Databrick api which can slow down ingestion. ",
     )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
index e92f4ff07b1ad..2401f1c3d163c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
@@ -23,6 +23,7 @@
     QueryStatementType,
     QueryStatus,
 )
+from databricks.sdk.service.workspace import ObjectType
 
 import datahub
 from datahub.ingestion.source.unity.proxy_profiling import (
@@ -33,6 +34,7 @@
     Catalog,
     Column,
     Metastore,
+    Notebook,
     Query,
     Schema,
     ServicePrincipal,
@@ -137,6 +139,21 @@ def service_principals(self) -> Iterable[ServicePrincipal]:
         for principal in self._workspace_client.service_principals.list():
             yield self._create_service_principal(principal)
 
+    def workspace_notebooks(self) -> Iterable[Notebook]:
+        for obj in self._workspace_client.workspace.list("/", recursive=True):
+            if obj.object_type == ObjectType.NOTEBOOK:
+                yield Notebook(
+                    id=obj.object_id,
+                    path=obj.path,
+                    language=obj.language,
+                    created_at=datetime.fromtimestamp(
+                        obj.created_at / 1000, tz=timezone.utc
+                    ),
+                    modified_at=datetime.fromtimestamp(
+                        obj.modified_at / 1000, tz=timezone.utc
+                    ),
+                )
+
     def query_history(
         self,
         start_time: datetime,
@@ -153,7 +170,7 @@ def query_history(
                     "start_time_ms": start_time.timestamp() * 1000,
                     "end_time_ms": end_time.timestamp() * 1000,
                 },
-                "statuses": [QueryStatus.FINISHED.value],
+                "statuses": [QueryStatus.FINISHED],
                 "statement_types": [typ.value for typ in ALLOWED_STATEMENT_TYPES],
             }
         )
@@ -196,61 +213,75 @@ def _query_history(
                 method, path, body={**body, "page_token": response["next_page_token"]}
             )
 
-    def list_lineages_by_table(self, table_name: str) -> dict:
+    def list_lineages_by_table(
+        self, table_name: str, include_entity_lineage: bool
+    ) -> dict:
         """List table lineage by table name."""
         return self._workspace_client.api_client.do(
             method="GET",
-            path="/api/2.0/lineage-tracking/table-lineage/get",
-            body={"table_name": table_name},
+            path="/api/2.0/lineage-tracking/table-lineage",
+            body={
+                "table_name": table_name,
+                "include_entity_lineage": include_entity_lineage,
+            },
         )
 
     def list_lineages_by_column(self, table_name: str, column_name: str) -> dict:
         """List column lineage by table name and column name."""
         return self._workspace_client.api_client.do(
             "GET",
-            "/api/2.0/lineage-tracking/column-lineage/get",
+            "/api/2.0/lineage-tracking/column-lineage",
             body={"table_name": table_name, "column_name": column_name},
         )
 
-    def table_lineage(self, table: Table) -> None:
+    def table_lineage(
+        self, table: Table, include_entity_lineage: bool
+    ) -> Optional[dict]:
         # Lineage endpoint doesn't exists on 2.1 version
         try:
             response: dict = self.list_lineages_by_table(
-                table_name=f"{table.schema.catalog.name}.{table.schema.name}.{table.name}"
+                table_name=table.ref.qualified_table_name,
+                include_entity_lineage=include_entity_lineage,
             )
-            table.upstreams = {
-                TableReference(
-                    table.schema.catalog.metastore.id,
-                    item["catalog_name"],
-                    item["schema_name"],
-                    item["name"],
-                ): {}
-                for item in response.get("upstream_tables", [])
-            }
+
+            for item in response.get("upstreams") or []:
+                if "tableInfo" in item:
+                    table_ref = TableReference.create_from_lineage(
+                        item["tableInfo"], table.schema.catalog.metastore.id
+                    )
+                    if table_ref:
+                        table.upstreams[table_ref] = {}
+                for notebook in item.get("notebookInfos") or []:
+                    table.upstream_notebooks.add(notebook["notebook_id"])
+
+            for item in response.get("downstreams") or []:
+                for notebook in item.get("notebookInfos") or []:
+                    table.downstream_notebooks.add(notebook["notebook_id"])
+
+            return response
         except Exception as e:
             logger.error(f"Error getting lineage: {e}")
+            return None
 
-    def get_column_lineage(self, table: Table) -> None:
+    def get_column_lineage(self, table: Table, include_entity_lineage: bool) -> None:
         try:
-            table_lineage_response: dict = self.list_lineages_by_table(
-                table_name=f"{table.schema.catalog.name}.{table.schema.name}.{table.name}"
+            table_lineage = self.table_lineage(
+                table, include_entity_lineage=include_entity_lineage
             )
-            if table_lineage_response:
+            if table_lineage:
                 for column in table.columns:
                     response: dict = self.list_lineages_by_column(
-                        table_name=f"{table.schema.catalog.name}.{table.schema.name}.{table.name}",
+                        table_name=table.ref.qualified_table_name,
                         column_name=column.name,
                     )
                     for item in response.get("upstream_cols", []):
-                        table_ref = TableReference(
-                            table.schema.catalog.metastore.id,
-                            item["catalog_name"],
-                            item["schema_name"],
-                            item["table_name"],
+                        table_ref = TableReference.create_from_lineage(
+                            item, table.schema.catalog.metastore.id
                         )
-                        table.upstreams.setdefault(table_ref, {}).setdefault(
-                            column.name, []
-                        ).append(item["name"])
+                        if table_ref:
+                            table.upstreams.setdefault(table_ref, {}).setdefault(
+                                column.name, []
+                            ).append(item["name"])
 
         except Exception as e:
             logger.error(f"Error getting lineage: {e}")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
index 2b943d8c98e7d..d57f20245913f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
@@ -1,8 +1,10 @@
 # Supported types are available at
 # https://api-docs.databricks.com/rest/latest/unity-catalog-api-specification-2-1.html?_ga=2.151019001.1795147704.1666247755-2119235717.1666247755
+import dataclasses
+import logging
 from dataclasses import dataclass, field
 from datetime import datetime
-from typing import Dict, List, Optional
+from typing import Dict, FrozenSet, List, Optional, Set
 
 from databricks.sdk.service.catalog import (
     CatalogType,
@@ -11,6 +13,7 @@
     TableType,
 )
 from databricks.sdk.service.sql import QueryStatementType
+from databricks.sdk.service.workspace import Language
 
 from datahub.metadata.schema_classes import (
     ArrayTypeClass,
@@ -26,6 +29,8 @@
     TimeTypeClass,
 )
 
+logger = logging.getLogger(__name__)
+
 DATA_TYPE_REGISTRY: dict = {
     ColumnTypeName.BOOLEAN: BooleanTypeClass,
     ColumnTypeName.BYTE: BytesTypeClass,
@@ -66,6 +71,9 @@
 ALLOWED_STATEMENT_TYPES = {*OPERATION_STATEMENT_TYPES.keys(), QueryStatementType.SELECT}
 
 
+NotebookId = int
+
+
 @dataclass
 class CommonProperty:
     id: str
@@ -136,6 +144,19 @@ def create(cls, table: "Table") -> "TableReference":
             table.name,
         )
 
+    @classmethod
+    def create_from_lineage(cls, d: dict, metastore: str) -> Optional["TableReference"]:
+        try:
+            return cls(
+                metastore,
+                d["catalog_name"],
+                d["schema_name"],
+                d.get("table_name", d["name"]),  # column vs table query output
+            )
+        except Exception as e:
+            logger.warning(f"Failed to create TableReference from {d}: {e}")
+            return None
+
     def __str__(self) -> str:
         return f"{self.metastore}.{self.catalog}.{self.schema}.{self.table}"
 
@@ -166,6 +187,8 @@ class Table(CommonProperty):
     view_definition: Optional[str]
     properties: Dict[str, str]
     upstreams: Dict[TableReference, Dict[str, List[str]]] = field(default_factory=dict)
+    upstream_notebooks: Set[NotebookId] = field(default_factory=set)
+    downstream_notebooks: Set[NotebookId] = field(default_factory=set)
 
     ref: TableReference = field(init=False)
 
@@ -228,3 +251,23 @@ def __bool__(self):
                 self.max is not None,
             )
         )
+
+
+@dataclass
+class Notebook:
+    id: NotebookId
+    path: str
+    language: Language
+    created_at: datetime
+    modified_at: datetime
+
+    upstreams: FrozenSet[TableReference] = field(default_factory=frozenset)
+
+    @classmethod
+    def add_upstream(cls, upstream: TableReference, notebook: "Notebook") -> "Notebook":
+        return cls(
+            **{  # type: ignore
+                **dataclasses.asdict(notebook),
+                "upstreams": frozenset([*notebook.upstreams, upstream]),
+            }
+        )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py
index 8382b31a56add..808172a136bb3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py
@@ -5,21 +5,23 @@
 from datahub.ingestion.source.state.stale_entity_removal_handler import (
     StaleEntityRemovalSourceReport,
 )
+from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
 from datahub.utilities.lossy_collections import LossyDict, LossyList
 
 
 @dataclass
-class UnityCatalogReport(StaleEntityRemovalSourceReport):
+class UnityCatalogReport(IngestionStageReport, StaleEntityRemovalSourceReport):
     metastores: EntityFilterReport = EntityFilterReport.field(type="metastore")
     catalogs: EntityFilterReport = EntityFilterReport.field(type="catalog")
     schemas: EntityFilterReport = EntityFilterReport.field(type="schema")
     tables: EntityFilterReport = EntityFilterReport.field(type="table/view")
     table_profiles: EntityFilterReport = EntityFilterReport.field(type="table profile")
+    notebooks: EntityFilterReport = EntityFilterReport.field(type="notebook")
 
     num_queries: int = 0
     num_queries_dropped_parse_failure: int = 0
-    num_queries_dropped_missing_table: int = 0  # Can be due to pattern filter
-    num_queries_dropped_duplicate_table: int = 0
+    num_queries_missing_table: int = 0  # Can be due to pattern filter
+    num_queries_duplicate_table: int = 0
     num_queries_parsed_by_spark_plan: int = 0
 
     # Distinguish from Operations emitted for created / updated timestamps
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
index 493acb939c3bb..f2da1aece9fd4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
@@ -2,7 +2,7 @@
 import re
 import time
 from datetime import timedelta
-from typing import Dict, Iterable, List, Optional, Set
+from typing import Dict, Iterable, List, Optional, Set, Union
 from urllib.parse import urljoin
 
 from datahub.emitter.mce_builder import (
@@ -18,6 +18,7 @@
     CatalogKey,
     ContainerKey,
     MetastoreKey,
+    NotebookKey,
     UnitySchemaKey,
     add_dataset_to_container,
     gen_containers,
@@ -56,6 +57,8 @@
     Catalog,
     Column,
     Metastore,
+    Notebook,
+    NotebookId,
     Schema,
     ServicePrincipal,
     Table,
@@ -69,6 +72,7 @@
     ViewProperties,
 )
 from datahub.metadata.schema_classes import (
+    BrowsePathsClass,
     DataPlatformInstanceClass,
     DatasetLineageTypeClass,
     DatasetPropertiesClass,
@@ -88,6 +92,7 @@
     UpstreamClass,
     UpstreamLineageClass,
 )
+from datahub.utilities.file_backed_collections import FileBackedDict
 from datahub.utilities.hive_schema_to_avro import get_schema_fields_for_hive_column
 from datahub.utilities.registries.domain_registry import DomainRegistry
 
@@ -157,6 +162,7 @@ def __init__(self, ctx: PipelineContext, config: UnityCatalogSourceConfig):
         # Global set of table refs
         self.table_refs: Set[TableReference] = set()
         self.view_refs: Set[TableReference] = set()
+        self.notebooks: FileBackedDict[Notebook] = FileBackedDict()
 
     @staticmethod
     def test_connection(config_dict: dict) -> TestConnectionReport:
@@ -176,6 +182,7 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
         ]
 
     def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
+        self.report.report_ingestion_stage_start("Start warehouse")
         wait_on_warehouse = None
         if self.config.is_profiling_enabled():
             # Can take several minutes, so start now and wait later
@@ -187,10 +194,23 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
                 )
                 return
 
+        self.report.report_ingestion_stage_start("Ingest service principals")
         self.build_service_principal_map()
+        if self.config.include_notebooks:
+            self.report.report_ingestion_stage_start("Ingest notebooks")
+            yield from self.process_notebooks()
+
         yield from self.process_metastores()
 
+        if self.config.include_notebooks:
+            self.report.report_ingestion_stage_start("Notebook lineage")
+            for notebook in self.notebooks.values():
+                wu = self._gen_notebook_lineage(notebook)
+                if wu:
+                    yield wu
+
         if self.config.include_usage_statistics:
+            self.report.report_ingestion_stage_start("Ingest usage")
             usage_extractor = UnityCatalogUsageExtractor(
                 config=self.config,
                 report=self.report,
@@ -203,6 +223,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
             )
 
         if self.config.is_profiling_enabled():
+            self.report.report_ingestion_stage_start("Wait on warehouse")
             assert wait_on_warehouse
             timeout = timedelta(seconds=self.config.profiling.max_wait_secs)
             wait_on_warehouse.result(timeout)
@@ -212,6 +233,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
                 self.unity_catalog_api_proxy,
                 self.gen_dataset_urn,
             )
+            self.report.report_ingestion_stage_start("Profiling")
             yield from profiling_extractor.get_workunits(self.table_refs)
 
     def build_service_principal_map(self) -> None:
@@ -223,6 +245,56 @@ def build_service_principal_map(self) -> None:
                 "service-principals", f"Unable to fetch service principals: {e}"
             )
 
+    def process_notebooks(self) -> Iterable[MetadataWorkUnit]:
+        for notebook in self.unity_catalog_api_proxy.workspace_notebooks():
+            self.notebooks[str(notebook.id)] = notebook
+            yield from self._gen_notebook_aspects(notebook)
+
+    def _gen_notebook_aspects(self, notebook: Notebook) -> Iterable[MetadataWorkUnit]:
+        mcps = MetadataChangeProposalWrapper.construct_many(
+            entityUrn=self.gen_notebook_urn(notebook),
+            aspects=[
+                DatasetPropertiesClass(
+                    name=notebook.path.rsplit("/", 1)[-1],
+                    customProperties={
+                        "path": notebook.path,
+                        "language": notebook.language.value,
+                    },
+                    externalUrl=urljoin(
+                        self.config.workspace_url, f"#notebook/{notebook.id}"
+                    ),
+                    created=TimeStampClass(int(notebook.created_at.timestamp() * 1000)),
+                    lastModified=TimeStampClass(
+                        int(notebook.modified_at.timestamp() * 1000)
+                    ),
+                ),
+                SubTypesClass(typeNames=[DatasetSubTypes.NOTEBOOK]),
+                BrowsePathsClass(paths=notebook.path.split("/")),
+                # TODO: Add DPI aspect
+            ],
+        )
+        for mcp in mcps:
+            yield mcp.as_workunit()
+
+        self.report.notebooks.processed(notebook.path)
+
+    def _gen_notebook_lineage(self, notebook: Notebook) -> Optional[MetadataWorkUnit]:
+        if not notebook.upstreams:
+            return None
+
+        return MetadataChangeProposalWrapper(
+            entityUrn=self.gen_notebook_urn(notebook),
+            aspect=UpstreamLineageClass(
+                upstreams=[
+                    UpstreamClass(
+                        dataset=self.gen_dataset_urn(upstream_ref),
+                        type=DatasetLineageTypeClass.COPY,
+                    )
+                    for upstream_ref in notebook.upstreams
+                ]
+            ),
+        ).as_workunit()
+
     def process_metastores(self) -> Iterable[MetadataWorkUnit]:
         metastore = self.unity_catalog_api_proxy.assigned_metastore()
         yield from self.gen_metastore_containers(metastore)
@@ -247,6 +319,7 @@ def process_schemas(self, catalog: Catalog) -> Iterable[MetadataWorkUnit]:
                 self.report.schemas.dropped(schema.id)
                 continue
 
+            self.report.report_ingestion_stage_start(f"Ingest schema {schema.id}")
             yield from self.gen_schema_containers(schema)
             yield from self.process_tables(schema)
 
@@ -282,13 +355,21 @@ def process_table(self, table: Table, schema: Schema) -> Iterable[MetadataWorkUn
         ownership = self._create_table_ownership_aspect(table)
         data_platform_instance = self._create_data_platform_instance_aspect(table)
 
-        lineage: Optional[UpstreamLineageClass] = None
         if self.config.include_column_lineage:
-            self.unity_catalog_api_proxy.get_column_lineage(table)
-            lineage = self._generate_column_lineage_aspect(dataset_urn, table)
+            self.unity_catalog_api_proxy.get_column_lineage(
+                table, include_entity_lineage=self.config.include_notebooks
+            )
         elif self.config.include_table_lineage:
-            self.unity_catalog_api_proxy.table_lineage(table)
-            lineage = self._generate_lineage_aspect(dataset_urn, table)
+            self.unity_catalog_api_proxy.table_lineage(
+                table, include_entity_lineage=self.config.include_notebooks
+            )
+        lineage = self._generate_lineage_aspect(dataset_urn, table)
+
+        if self.config.include_notebooks:
+            for notebook_id in table.downstream_notebooks:
+                self.notebooks[str(notebook_id)] = Notebook.add_upstream(
+                    table.ref, self.notebooks[str(notebook_id)]
+                )
 
         yield from [
             mcp.as_workunit()
@@ -308,7 +389,7 @@ def process_table(self, table: Table, schema: Schema) -> Iterable[MetadataWorkUn
             )
         ]
 
-    def _generate_column_lineage_aspect(
+    def _generate_lineage_aspect(
         self, dataset_urn: str, table: Table
     ) -> Optional[UpstreamLineageClass]:
         upstreams: List[UpstreamClass] = []
@@ -318,6 +399,7 @@ def _generate_column_lineage_aspect(
         ):
             upstream_urn = self.gen_dataset_urn(upstream_ref)
 
+            # Should be empty if config.include_column_lineage is False
             finegrained_lineages.extend(
                 FineGrainedLineage(
                     upstreamType=FineGrainedLineageUpstreamType.FIELD_SET,
@@ -331,38 +413,28 @@ def _generate_column_lineage_aspect(
                 for d_col, u_cols in sorted(downstream_to_upstream_cols.items())
             )
 
-            upstream_table = UpstreamClass(
-                upstream_urn,
-                DatasetLineageTypeClass.TRANSFORMED,
-            )
-            upstreams.append(upstream_table)
-
-        if upstreams:
-            return UpstreamLineageClass(
-                upstreams=upstreams, fineGrainedLineages=finegrained_lineages
-            )
-        else:
-            return None
-
-    def _generate_lineage_aspect(
-        self, dataset_urn: str, table: Table
-    ) -> Optional[UpstreamLineageClass]:
-        upstreams: List[UpstreamClass] = []
-        for upstream in sorted(table.upstreams.keys()):
-            upstream_urn = make_dataset_urn_with_platform_instance(
-                self.platform,
-                f"{table.schema.catalog.metastore.id}.{upstream}",
-                self.platform_instance_name,
+            upstreams.append(
+                UpstreamClass(
+                    dataset=upstream_urn,
+                    type=DatasetLineageTypeClass.TRANSFORMED,
+                )
             )
 
-            upstream_table = UpstreamClass(
-                upstream_urn,
-                DatasetLineageTypeClass.TRANSFORMED,
+        for notebook in table.upstream_notebooks:
+            upstreams.append(
+                UpstreamClass(
+                    dataset=self.gen_notebook_urn(notebook),
+                    type=DatasetLineageTypeClass.TRANSFORMED,
+                )
             )
-            upstreams.append(upstream_table)
 
         if upstreams:
-            return UpstreamLineageClass(upstreams=upstreams)
+            return UpstreamLineageClass(
+                upstreams=upstreams,
+                fineGrainedLineages=finegrained_lineages
+                if self.config.include_column_lineage
+                else None,
+            )
         else:
             return None
 
@@ -389,6 +461,14 @@ def gen_dataset_urn(self, table_ref: TableReference) -> str:
             name=str(table_ref),
         )
 
+    def gen_notebook_urn(self, notebook: Union[Notebook, NotebookId]) -> str:
+        notebook_id = notebook.id if isinstance(notebook, Notebook) else notebook
+        return NotebookKey(
+            notebook_id=notebook_id,
+            platform=self.platform,
+            instance=self.config.platform_instance,
+        ).as_urn()
+
     def gen_schema_containers(self, schema: Schema) -> Iterable[MetadataWorkUnit]:
         domain_urn = self._gen_domain_urn(f"{schema.catalog.name}.{schema.name}")
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py b/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py
index 49f56b46fb012..ab21c1a318659 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py
@@ -214,12 +214,15 @@ def _resolve_tables(
         self, tables: List[str], table_map: TableMap
     ) -> List[TableReference]:
         """Resolve tables to TableReferences, filtering out unrecognized or unresolvable table names."""
+
+        missing_table = False
+        duplicate_table = False
         output = []
         for table in tables:
             table = str(table)
             if table not in table_map:
                 logger.debug(f"Dropping query with unrecognized table: {table}")
-                self.report.num_queries_dropped_missing_table += 1
+                missing_table = True
             else:
                 refs = table_map[table]
                 if len(refs) == 1:
@@ -228,6 +231,11 @@ def _resolve_tables(
                     logger.warning(
                         f"Could not resolve table ref for {table}: {len(refs)} duplicates."
                     )
-                    self.report.num_queries_dropped_duplicate_table += 1
+                    duplicate_table = True
+
+        if missing_table:
+            self.report.num_queries_missing_table += 1
+        if duplicate_table:
+            self.report.num_queries_duplicate_table += 1
 
         return output

From 301d3e6b1ccffaf946f128766578faddbc7ac44e Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Wed, 4 Oct 2023 10:23:13 -0400
Subject: [PATCH 086/156] test(ingest/unity): Add Unity Catalog memory
 performance testing (#8932)

---
 .../ingestion/source/unity/proxy_types.py     |   1 -
 .../tests/performance/bigquery/__init__.py    |   0
 .../bigquery_events.py}                       |   0
 .../{ => bigquery}/test_bigquery_usage.py     |  22 +--
 .../tests/performance/data_generation.py      |  53 ++++-
 .../tests/performance/data_model.py           |  31 ++-
 .../tests/performance/databricks/__init__.py  |   0
 .../performance/databricks/test_unity.py      |  71 +++++++
 .../databricks/unity_proxy_mock.py            | 183 ++++++++++++++++++
 .../tests/performance/helpers.py              |  21 ++
 .../tests/unit/test_bigquery_usage.py         |   7 +-
 11 files changed, 356 insertions(+), 33 deletions(-)
 create mode 100644 metadata-ingestion/tests/performance/bigquery/__init__.py
 rename metadata-ingestion/tests/performance/{bigquery.py => bigquery/bigquery_events.py} (100%)
 rename metadata-ingestion/tests/performance/{ => bigquery}/test_bigquery_usage.py (80%)
 create mode 100644 metadata-ingestion/tests/performance/databricks/__init__.py
 create mode 100644 metadata-ingestion/tests/performance/databricks/test_unity.py
 create mode 100644 metadata-ingestion/tests/performance/databricks/unity_proxy_mock.py
 create mode 100644 metadata-ingestion/tests/performance/helpers.py

diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
index d57f20245913f..54ac2e90d7c7e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
@@ -175,7 +175,6 @@ class Table(CommonProperty):
     columns: List[Column]
     storage_location: Optional[str]
     data_source_format: Optional[DataSourceFormat]
-    comment: Optional[str]
     table_type: TableType
     owner: Optional[str]
     generation: Optional[int]
diff --git a/metadata-ingestion/tests/performance/bigquery/__init__.py b/metadata-ingestion/tests/performance/bigquery/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/metadata-ingestion/tests/performance/bigquery.py b/metadata-ingestion/tests/performance/bigquery/bigquery_events.py
similarity index 100%
rename from metadata-ingestion/tests/performance/bigquery.py
rename to metadata-ingestion/tests/performance/bigquery/bigquery_events.py
diff --git a/metadata-ingestion/tests/performance/test_bigquery_usage.py b/metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py
similarity index 80%
rename from metadata-ingestion/tests/performance/test_bigquery_usage.py
rename to metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py
index 7e05ef070b45d..bbc3378450bff 100644
--- a/metadata-ingestion/tests/performance/test_bigquery_usage.py
+++ b/metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py
@@ -2,13 +2,11 @@
 import os
 import random
 from datetime import timedelta
-from typing import Iterable, Tuple
 
 import humanfriendly
 import psutil
 
 from datahub.emitter.mce_builder import make_dataset_urn
-from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.bigquery_v2.bigquery_config import (
     BigQueryUsageConfig,
     BigQueryV2Config,
@@ -16,12 +14,13 @@
 from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report
 from datahub.ingestion.source.bigquery_v2.usage import BigQueryUsageExtractor
 from datahub.utilities.perf_timer import PerfTimer
-from tests.performance.bigquery import generate_events, ref_from_table
+from tests.performance.bigquery.bigquery_events import generate_events, ref_from_table
 from tests.performance.data_generation import (
     NormalDistribution,
     generate_data,
     generate_queries,
 )
+from tests.performance.helpers import workunit_sink
 
 
 def run_test():
@@ -33,7 +32,7 @@ def run_test():
         num_views=2000,
         time_range=timedelta(days=7),
     )
-    all_tables = seed_metadata.tables + seed_metadata.views
+    all_tables = seed_metadata.all_tables
 
     config = BigQueryV2Config(
         start_time=seed_metadata.start_time,
@@ -88,21 +87,6 @@ def run_test():
     print(f"Hash collisions: {report.num_usage_query_hash_collisions}")
 
 
-def workunit_sink(workunits: Iterable[MetadataWorkUnit]) -> Tuple[int, int]:
-    peak_memory_usage = psutil.Process(os.getpid()).memory_info().rss
-    i: int = 0
-    for i, wu in enumerate(workunits):
-        if i % 10_000 == 0:
-            peak_memory_usage = max(
-                peak_memory_usage, psutil.Process(os.getpid()).memory_info().rss
-            )
-    peak_memory_usage = max(
-        peak_memory_usage, psutil.Process(os.getpid()).memory_info().rss
-    )
-
-    return i, peak_memory_usage
-
-
 if __name__ == "__main__":
     root_logger = logging.getLogger()
     root_logger.setLevel(logging.INFO)
diff --git a/metadata-ingestion/tests/performance/data_generation.py b/metadata-ingestion/tests/performance/data_generation.py
index c530848f27f5c..67b156896909a 100644
--- a/metadata-ingestion/tests/performance/data_generation.py
+++ b/metadata-ingestion/tests/performance/data_generation.py
@@ -11,11 +11,14 @@
 import uuid
 from dataclasses import dataclass
 from datetime import datetime, timedelta, timezone
-from typing import Iterable, List, TypeVar
+from typing import Iterable, List, TypeVar, Union, cast
 
 from faker import Faker
 
 from tests.performance.data_model import (
+    Column,
+    ColumnMapping,
+    ColumnType,
     Container,
     FieldAccess,
     Query,
@@ -52,15 +55,21 @@ def sample_with_floor(self, floor: int = 1) -> int:
 
 @dataclass
 class SeedMetadata:
-    containers: List[Container]
+    # Each list is a layer of containers, e.g. [[databases], [schemas]]
+    containers: List[List[Container]]
+
     tables: List[Table]
     views: List[View]
     start_time: datetime
     end_time: datetime
 
+    @property
+    def all_tables(self) -> List[Table]:
+        return self.tables + cast(List[Table], self.views)
+
 
 def generate_data(
-    num_containers: int,
+    num_containers: Union[List[int], int],
     num_tables: int,
     num_views: int,
     columns_per_table: NormalDistribution = NormalDistribution(5, 2),
@@ -68,32 +77,52 @@ def generate_data(
     view_definition_length: NormalDistribution = NormalDistribution(150, 50),
     time_range: timedelta = timedelta(days=14),
 ) -> SeedMetadata:
-    containers = [Container(f"container-{i}") for i in range(num_containers)]
+    # Assemble containers
+    if isinstance(num_containers, int):
+        num_containers = [num_containers]
+
+    containers: List[List[Container]] = []
+    for i, num_in_layer in enumerate(num_containers):
+        layer = [
+            Container(
+                f"{i}-container-{j}",
+                parent=random.choice(containers[-1]) if containers else None,
+            )
+            for j in range(num_in_layer)
+        ]
+        containers.append(layer)
+
+    # Assemble tables
     tables = [
         Table(
             f"table-{i}",
-            container=random.choice(containers),
+            container=random.choice(containers[-1]),
             columns=[
                 f"column-{j}-{uuid.uuid4()}"
                 for j in range(columns_per_table.sample_with_floor())
             ],
+            column_mapping=None,
         )
         for i in range(num_tables)
     ]
     views = [
         View(
             f"view-{i}",
-            container=random.choice(containers),
+            container=random.choice(containers[-1]),
             columns=[
                 f"column-{j}-{uuid.uuid4()}"
                 for j in range(columns_per_table.sample_with_floor())
             ],
+            column_mapping=None,
             definition=f"{uuid.uuid4()}-{'*' * view_definition_length.sample_with_floor(10)}",
             parents=random.sample(tables, parents_per_view.sample_with_floor()),
         )
         for i in range(num_views)
     ]
 
+    for table in tables + views:
+        _generate_column_mapping(table)
+
     now = datetime.now(tz=timezone.utc)
     return SeedMetadata(
         containers=containers,
@@ -162,6 +191,18 @@ def generate_queries(
         )
 
 
+def _generate_column_mapping(table: Table) -> ColumnMapping:
+    d = {}
+    for column in table.columns:
+        d[column] = Column(
+            name=column,
+            type=random.choice(list(ColumnType)),
+            nullable=random.random() < 0.1,  # Fixed 10% chance for now
+        )
+    table.column_mapping = d
+    return d
+
+
 def _sample_list(lst: List[T], dist: NormalDistribution, floor: int = 1) -> List[T]:
     return random.sample(lst, min(dist.sample_with_floor(floor), len(lst)))
 
diff --git a/metadata-ingestion/tests/performance/data_model.py b/metadata-ingestion/tests/performance/data_model.py
index c593e69ceb9a7..9425fa827070e 100644
--- a/metadata-ingestion/tests/performance/data_model.py
+++ b/metadata-ingestion/tests/performance/data_model.py
@@ -1,10 +1,10 @@
 from dataclasses import dataclass
 from datetime import datetime
-from typing import List, Optional
+from enum import Enum
+from typing import Dict, List, Optional
 
 from typing_extensions import Literal
 
-Column = str
 StatementType = Literal[  # SELECT + values from OperationTypeClass
     "SELECT",
     "INSERT",
@@ -21,13 +21,36 @@
 @dataclass
 class Container:
     name: str
+    parent: Optional["Container"] = None
+
+
+class ColumnType(str, Enum):
+    # Can add types that take parameters in the future
+
+    INTEGER = "INTEGER"
+    FLOAT = "FLOAT"  # Double precision (64 bit)
+    STRING = "STRING"
+    BOOLEAN = "BOOLEAN"
+    DATETIME = "DATETIME"
+
+
+@dataclass
+class Column:
+    name: str
+    type: ColumnType
+    nullable: bool
+
+
+ColumnRef = str
+ColumnMapping = Dict[ColumnRef, Column]
 
 
 @dataclass
 class Table:
     name: str
     container: Container
-    columns: List[Column]
+    columns: List[ColumnRef]
+    column_mapping: Optional[ColumnMapping]
 
     def is_view(self) -> bool:
         return False
@@ -44,7 +67,7 @@ def is_view(self) -> bool:
 
 @dataclass
 class FieldAccess:
-    column: Column
+    column: ColumnRef
     table: Table
 
 
diff --git a/metadata-ingestion/tests/performance/databricks/__init__.py b/metadata-ingestion/tests/performance/databricks/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/metadata-ingestion/tests/performance/databricks/test_unity.py b/metadata-ingestion/tests/performance/databricks/test_unity.py
new file mode 100644
index 0000000000000..cc9558f0692ed
--- /dev/null
+++ b/metadata-ingestion/tests/performance/databricks/test_unity.py
@@ -0,0 +1,71 @@
+import logging
+import os
+from unittest.mock import patch
+
+import humanfriendly
+import psutil
+
+from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.source.unity.config import UnityCatalogSourceConfig
+from datahub.ingestion.source.unity.source import UnityCatalogSource
+from datahub.utilities.perf_timer import PerfTimer
+from tests.performance.data_generation import (
+    NormalDistribution,
+    generate_data,
+    generate_queries,
+)
+from tests.performance.databricks.unity_proxy_mock import UnityCatalogApiProxyMock
+from tests.performance.helpers import workunit_sink
+
+
+def run_test():
+    seed_metadata = generate_data(
+        num_containers=[1, 100, 5000],
+        num_tables=50000,
+        num_views=10000,
+        columns_per_table=NormalDistribution(100, 50),
+        parents_per_view=NormalDistribution(5, 5),
+        view_definition_length=NormalDistribution(1000, 300),
+    )
+    queries = generate_queries(
+        seed_metadata,
+        num_selects=100000,
+        num_operations=100000,
+        num_unique_queries=10000,
+        num_users=1000,
+    )
+    proxy_mock = UnityCatalogApiProxyMock(
+        seed_metadata, queries=queries, num_service_principals=10000
+    )
+    print("Data generated")
+
+    config = UnityCatalogSourceConfig(
+        token="", workspace_url="http://localhost:1234", include_usage_statistics=False
+    )
+    ctx = PipelineContext(run_id="test")
+    with patch(
+        "datahub.ingestion.source.unity.source.UnityCatalogApiProxy",
+        lambda *args, **kwargs: proxy_mock,
+    ):
+        source: UnityCatalogSource = UnityCatalogSource(ctx, config)
+
+    pre_mem_usage = psutil.Process(os.getpid()).memory_info().rss
+    print(f"Test data size: {humanfriendly.format_size(pre_mem_usage)}")
+
+    with PerfTimer() as timer:
+        workunits = source.get_workunits()
+        num_workunits, peak_memory_usage = workunit_sink(workunits)
+        print(f"Workunits Generated: {num_workunits}")
+        print(f"Seconds Elapsed: {timer.elapsed_seconds():.2f} seconds")
+
+    print(
+        f"Peak Memory Used: {humanfriendly.format_size(peak_memory_usage - pre_mem_usage)}"
+    )
+    print(source.report.aspects)
+
+
+if __name__ == "__main__":
+    root_logger = logging.getLogger()
+    root_logger.setLevel(logging.INFO)
+    root_logger.addHandler(logging.StreamHandler())
+    run_test()
diff --git a/metadata-ingestion/tests/performance/databricks/unity_proxy_mock.py b/metadata-ingestion/tests/performance/databricks/unity_proxy_mock.py
new file mode 100644
index 0000000000000..593163e12bf0a
--- /dev/null
+++ b/metadata-ingestion/tests/performance/databricks/unity_proxy_mock.py
@@ -0,0 +1,183 @@
+import uuid
+from collections import defaultdict
+from datetime import datetime, timezone
+from typing import Dict, Iterable, List
+
+from databricks.sdk.service.catalog import ColumnTypeName
+from databricks.sdk.service.sql import QueryStatementType
+
+from datahub.ingestion.source.unity.proxy_types import (
+    Catalog,
+    CatalogType,
+    Column,
+    Metastore,
+    Query,
+    Schema,
+    ServicePrincipal,
+    Table,
+    TableType,
+)
+from tests.performance import data_model
+from tests.performance.data_generation import SeedMetadata
+from tests.performance.data_model import ColumnType, StatementType
+
+
+class UnityCatalogApiProxyMock:
+    """Mimics UnityCatalogApiProxy for performance testing."""
+
+    def __init__(
+        self,
+        seed_metadata: SeedMetadata,
+        queries: Iterable[data_model.Query] = (),
+        num_service_principals: int = 0,
+    ) -> None:
+        self.seed_metadata = seed_metadata
+        self.queries = queries
+        self.num_service_principals = num_service_principals
+        self.warehouse_id = "invalid-warehouse-id"
+
+        # Cache for performance
+        self._schema_to_table: Dict[str, List[data_model.Table]] = defaultdict(list)
+        for table in seed_metadata.all_tables:
+            self._schema_to_table[table.container.name].append(table)
+
+    def check_basic_connectivity(self) -> bool:
+        return True
+
+    def assigned_metastore(self) -> Metastore:
+        container = self.seed_metadata.containers[0][0]
+        return Metastore(
+            id=container.name,
+            name=container.name,
+            global_metastore_id=container.name,
+            metastore_id=container.name,
+            comment=None,
+            owner=None,
+            cloud=None,
+            region=None,
+        )
+
+    def catalogs(self, metastore: Metastore) -> Iterable[Catalog]:
+        for container in self.seed_metadata.containers[1]:
+            if not container.parent or metastore.name != container.parent.name:
+                continue
+
+            yield Catalog(
+                id=f"{metastore.id}.{container.name}",
+                name=container.name,
+                metastore=metastore,
+                comment=None,
+                owner=None,
+                type=CatalogType.MANAGED_CATALOG,
+            )
+
+    def schemas(self, catalog: Catalog) -> Iterable[Schema]:
+        for container in self.seed_metadata.containers[2]:
+            # Assumes all catalog names are unique
+            if not container.parent or catalog.name != container.parent.name:
+                continue
+
+            yield Schema(
+                id=f"{catalog.id}.{container.name}",
+                name=container.name,
+                catalog=catalog,
+                comment=None,
+                owner=None,
+            )
+
+    def tables(self, schema: Schema) -> Iterable[Table]:
+        for table in self._schema_to_table[schema.name]:
+            columns = []
+            if table.column_mapping:
+                for i, col_name in enumerate(table.columns):
+                    column = table.column_mapping[col_name]
+                    columns.append(
+                        Column(
+                            id=column.name,
+                            name=column.name,
+                            type_name=self._convert_column_type(column.type),
+                            type_text=column.type.value,
+                            nullable=column.nullable,
+                            position=i,
+                            comment=None,
+                            type_precision=0,
+                            type_scale=0,
+                        )
+                    )
+
+            yield Table(
+                id=f"{schema.id}.{table.name}",
+                name=table.name,
+                schema=schema,
+                table_type=TableType.VIEW if table.is_view() else TableType.MANAGED,
+                columns=columns,
+                created_at=datetime.now(tz=timezone.utc),
+                comment=None,
+                owner=None,
+                storage_location=None,
+                data_source_format=None,
+                generation=None,
+                created_by="",
+                updated_at=None,
+                updated_by=None,
+                table_id="",
+                view_definition=table.definition
+                if isinstance(table, data_model.View)
+                else None,
+                properties={},
+            )
+
+    def service_principals(self) -> Iterable[ServicePrincipal]:
+        for i in range(self.num_service_principals):
+            yield ServicePrincipal(
+                id=str(i),
+                application_id=str(uuid.uuid4()),
+                display_name=f"user-{i}",
+                active=True,
+            )
+
+    def query_history(
+        self,
+        start_time: datetime,
+        end_time: datetime,
+    ) -> Iterable[Query]:
+        for i, query in enumerate(self.queries):
+            yield Query(
+                query_id=str(i),
+                query_text=query.text,
+                statement_type=self._convert_statement_type(query.type),
+                start_time=query.timestamp,
+                end_time=query.timestamp,
+                user_id=hash(query.actor),
+                user_name=query.actor,
+                executed_as_user_id=hash(query.actor),
+                executed_as_user_name=None,
+            )
+
+    def table_lineage(self, table: Table) -> None:
+        pass
+
+    def get_column_lineage(self, table: Table) -> None:
+        pass
+
+    @staticmethod
+    def _convert_column_type(t: ColumnType) -> ColumnTypeName:
+        if t == ColumnType.INTEGER:
+            return ColumnTypeName.INT
+        elif t == ColumnType.FLOAT:
+            return ColumnTypeName.DOUBLE
+        elif t == ColumnType.STRING:
+            return ColumnTypeName.STRING
+        elif t == ColumnType.BOOLEAN:
+            return ColumnTypeName.BOOLEAN
+        elif t == ColumnType.DATETIME:
+            return ColumnTypeName.TIMESTAMP
+        else:
+            raise ValueError(f"Unknown column type: {t}")
+
+    @staticmethod
+    def _convert_statement_type(t: StatementType) -> QueryStatementType:
+        if t == "CUSTOM" or t == "UNKNOWN":
+            return QueryStatementType.OTHER
+        else:
+            return QueryStatementType[t]
diff --git a/metadata-ingestion/tests/performance/helpers.py b/metadata-ingestion/tests/performance/helpers.py
new file mode 100644
index 0000000000000..eb98e53670c96
--- /dev/null
+++ b/metadata-ingestion/tests/performance/helpers.py
@@ -0,0 +1,21 @@
+import os
+from typing import Iterable, Tuple
+
+import psutil
+
+from datahub.ingestion.api.workunit import MetadataWorkUnit
+
+
+def workunit_sink(workunits: Iterable[MetadataWorkUnit]) -> Tuple[int, int]:
+    peak_memory_usage = psutil.Process(os.getpid()).memory_info().rss
+    i: int = 0
+    for i, wu in enumerate(workunits):
+        if i % 10_000 == 0:
+            peak_memory_usage = max(
+                peak_memory_usage, psutil.Process(os.getpid()).memory_info().rss
+            )
+    peak_memory_usage = max(
+        peak_memory_usage, psutil.Process(os.getpid()).memory_info().rss
+    )
+
+    return i, peak_memory_usage
diff --git a/metadata-ingestion/tests/unit/test_bigquery_usage.py b/metadata-ingestion/tests/unit/test_bigquery_usage.py
index e06c6fb3fe7e5..1eb5d8b00e27c 100644
--- a/metadata-ingestion/tests/unit/test_bigquery_usage.py
+++ b/metadata-ingestion/tests/unit/test_bigquery_usage.py
@@ -35,7 +35,7 @@
     TimeWindowSizeClass,
 )
 from datahub.testing.compare_metadata_json import diff_metadata_json
-from tests.performance.bigquery import generate_events, ref_from_table
+from tests.performance.bigquery.bigquery_events import generate_events, ref_from_table
 from tests.performance.data_generation import generate_data, generate_queries
 from tests.performance.data_model import Container, FieldAccess, Query, Table, View
 
@@ -45,14 +45,15 @@
 ACTOR_2, ACTOR_2_URN = "b@acryl.io", "urn:li:corpuser:b"
 DATABASE_1 = Container("database_1")
 DATABASE_2 = Container("database_2")
-TABLE_1 = Table("table_1", DATABASE_1, ["id", "name", "age"])
-TABLE_2 = Table("table_2", DATABASE_1, ["id", "table_1_id", "value"])
+TABLE_1 = Table("table_1", DATABASE_1, ["id", "name", "age"], None)
+TABLE_2 = Table("table_2", DATABASE_1, ["id", "table_1_id", "value"], None)
 VIEW_1 = View(
     name="view_1",
     container=DATABASE_1,
     columns=["id", "name", "total"],
     definition="VIEW DEFINITION 1",
     parents=[TABLE_1, TABLE_2],
+    column_mapping=None,
 )
 ALL_TABLES = [TABLE_1, TABLE_2, VIEW_1]
 

From 165aa54d1e6f1a1707f79be3cce39ec06c8a1652 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= <sgomezvillamor@gmail.com>
Date: Wed, 4 Oct 2023 19:24:04 +0200
Subject: [PATCH 087/156] doc: DataHubUpgradeHistory_v1 (#8918)

---
 docs/deploy/confluent-cloud.md |  5 +++++
 docs/how/kafka-config.md       | 17 +++++++++++------
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/docs/deploy/confluent-cloud.md b/docs/deploy/confluent-cloud.md
index 794b55d4686bf..096fd9984f474 100644
--- a/docs/deploy/confluent-cloud.md
+++ b/docs/deploy/confluent-cloud.md
@@ -16,6 +16,11 @@ First, you'll need to create following new topics in the [Confluent Control Cent
 6. (Deprecated) **MetadataChangeEvent_v4**: Metadata change proposal messages
 7. (Deprecated) **MetadataAuditEvent_v4**: Metadata change log messages
 8. (Deprecated) **FailedMetadataChangeEvent_v4**: Failed to process #1 event
+9. **MetadataGraphEvent_v4**:
+10. **MetadataGraphEvent_v4**:
+11. **PlatformEvent_v1**
+12. **DataHubUpgradeHistory_v1**: Notifies the end of DataHub Upgrade job so dependants can act accordingly (_eg_, startup).
+    Note this topic requires special configuration: **Infinite retention**. Also, 1 partition is enough for the occasional traffic.
 
 The first five are the most important, and are explained in more depth in [MCP/MCL](../advanced/mcp-mcl.md). The final topics are
 those which are deprecated but still used under certain circumstances. It is likely that in the future they will be completely 
diff --git a/docs/how/kafka-config.md b/docs/how/kafka-config.md
index f3f81c3d07c01..2f20e8b548f83 100644
--- a/docs/how/kafka-config.md
+++ b/docs/how/kafka-config.md
@@ -52,16 +52,21 @@ Also see [Kafka Connect Security](https://docs.confluent.io/current/connect/secu
 
 By default, DataHub relies on the a set of Kafka topics to operate. By default, they have the following names:
 
-- **MetadataChangeProposal_v1**
-- **FailedMetadataChangeProposal_v1**
-- **MetadataChangeLog_Versioned_v1**
-- **MetadataChangeLog_Timeseries_v1**
-- **DataHubUsageEvent_v1**: User behavior tracking event for UI
+1. **MetadataChangeProposal_v1**
+2. **FailedMetadataChangeProposal_v1**
+3. **MetadataChangeLog_Versioned_v1**
+4. **MetadataChangeLog_Timeseries_v1**
+5. **DataHubUsageEvent_v1**: User behavior tracking event for UI
 6. (Deprecated) **MetadataChangeEvent_v4**: Metadata change proposal messages
 7. (Deprecated) **MetadataAuditEvent_v4**: Metadata change log messages
 8. (Deprecated) **FailedMetadataChangeEvent_v4**: Failed to process #1 event
+9. **MetadataGraphEvent_v4**:
+10. **MetadataGraphEvent_v4**:
+11. **PlatformEvent_v1**:
+12. **DataHubUpgradeHistory_v1**: Notifies the end of DataHub Upgrade job so dependants can act accordingly (_eg_, startup).
+    Note this topic requires special configuration: **Infinite retention**. Also, 1 partition is enough for the occasional traffic.
 
-These topics are discussed at more length in [Metadata Events](../what/mxe.md).
+How Metadata Events relate to these topics is discussed at more length in [Metadata Events](../what/mxe.md).
 
 We've included environment variables to customize the name each of these topics, for cases where an organization has naming rules for your topics.
 

From 3a9452c2072c95cbd7a4bf1270b4ef07abd1b1eb Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Thu, 5 Oct 2023 03:42:00 +0900
Subject: [PATCH 088/156] fix: fix typo on aws guide (#8944)

---
 docs/deploy/aws.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/deploy/aws.md b/docs/deploy/aws.md
index 228fcb51d1a28..e0f57b4a0b0cb 100644
--- a/docs/deploy/aws.md
+++ b/docs/deploy/aws.md
@@ -100,7 +100,7 @@ eksctl create iamserviceaccount \
 Install the TargetGroupBinding custom resource definition by running the following.
 
 ```
-kubectl apply -k "github.com/aws/eks-charts/stable/aws-load-balancer-controller//crds?ref=master"
+kubectl apply -k "github.com/aws/eks-charts/stable/aws-load-balancer-controller/crds?ref=master"
 ```
 
 Add the helm chart repository containing the latest version of the ALB controller.

From e2afd44bfeb287e8365b99bc7677d06e4172643b Mon Sep 17 00:00:00 2001
From: ethan-cartwright <ethan.cartwright.m@gmail.com>
Date: Wed, 4 Oct 2023 16:38:58 -0400
Subject: [PATCH 089/156] feat(dbt-ingestion): add documentation link from dbt
 source to institutionalMemory (#8686)

Co-authored-by: Ethan Cartwright <ethan.cartwright@acryl.io>
Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
---
 .../docs/sources/dbt/dbt-cloud_recipe.yml     |  8 +--
 metadata-ingestion/docs/sources/dbt/dbt.md    |  7 ++
 .../ingestion/source/dbt/dbt_common.py        |  6 ++
 .../src/datahub/utilities/mapping.py          | 67 ++++++++++++++++++-
 metadata-ingestion/tests/unit/test_mapping.py | 41 ++++++++++++
 5 files changed, 123 insertions(+), 6 deletions(-)

diff --git a/metadata-ingestion/docs/sources/dbt/dbt-cloud_recipe.yml b/metadata-ingestion/docs/sources/dbt/dbt-cloud_recipe.yml
index 113303cfc1ad4..ef0776b189ca9 100644
--- a/metadata-ingestion/docs/sources/dbt/dbt-cloud_recipe.yml
+++ b/metadata-ingestion/docs/sources/dbt/dbt-cloud_recipe.yml
@@ -6,14 +6,14 @@ source:
     # In the URL https://cloud.getdbt.com/next/deploy/107298/projects/175705/jobs/148094,
     # 107298 is the account_id, 175705 is the project_id, and 148094 is the job_id
 
-    account_id: # set to your dbt cloud account id
-    project_id: # set to your dbt cloud project id
-    job_id: # set to your dbt cloud job id
+    account_id: "${DBT_ACCOUNT_ID}" # set to your dbt cloud account id
+    project_id: "${DBT_PROJECT_ID}" # set to your dbt cloud project id
+    job_id: "${DBT_JOB_ID}" # set to your dbt cloud job id
     run_id: # set to your dbt cloud run id. This is optional, and defaults to the latest run
 
     target_platform: postgres
 
     # Options
-    target_platform: "my_target_platform_id" # e.g. bigquery/postgres/etc.
+    target_platform: "${TARGET_PLATFORM_ID}" # e.g. bigquery/postgres/etc.
 
 # sink configs
diff --git a/metadata-ingestion/docs/sources/dbt/dbt.md b/metadata-ingestion/docs/sources/dbt/dbt.md
index bfc3ebd5bb350..43ced13c3b1f8 100644
--- a/metadata-ingestion/docs/sources/dbt/dbt.md
+++ b/metadata-ingestion/docs/sources/dbt/dbt.md
@@ -38,6 +38,12 @@ meta_mapping:
     operation: "add_terms"
     config:
       separator: ","
+  documentation_link:
+    match: "(?:https?)?\:\/\/\w*[^#]*"
+    operation: "add_doc_link"
+    config:
+      link: {{ $match }}
+      description: "Documentation Link"
 column_meta_mapping:
   terms_list:
     match: ".*"
@@ -57,6 +63,7 @@ We support the following operations:
 2. add_term - Requires `term` property in config.
 3. add_terms - Accepts an optional `separator` property in config.
 4. add_owner - Requires `owner_type` property in config which can be either user or group. Optionally accepts the `owner_category` config property which you can set to one of `['TECHNICAL_OWNER', 'BUSINESS_OWNER', 'DATA_STEWARD', 'DATAOWNER'` (defaults to `DATAOWNER`).
+5. add_doc_link - Requires `link` and `description` properties in config. Upon ingestion run, this will overwrite current links in the institutional knowledge section with this new link. The anchor text is defined here in the meta_mappings as `description`.
 
 Note:
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
index 782d94f39e8a5..3edeb695e9f21 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
@@ -1188,9 +1188,15 @@ def _generate_base_aspects(
         ):
             aspects.append(meta_aspects.get(Constants.ADD_TERM_OPERATION))
 
+        # add meta links aspect
+        meta_links_aspect = meta_aspects.get(Constants.ADD_DOC_LINK_OPERATION)
+        if meta_links_aspect and self.config.enable_meta_mapping:
+            aspects.append(meta_links_aspect)
+
         # add schema metadata aspect
         schema_metadata = self.get_schema_metadata(self.report, node, mce_platform)
         aspects.append(schema_metadata)
+
         return aspects
 
     def get_schema_metadata(
diff --git a/metadata-ingestion/src/datahub/utilities/mapping.py b/metadata-ingestion/src/datahub/utilities/mapping.py
index 793eccfb22c7e..eb2d975ee607f 100644
--- a/metadata-ingestion/src/datahub/utilities/mapping.py
+++ b/metadata-ingestion/src/datahub/utilities/mapping.py
@@ -2,12 +2,16 @@
 import logging
 import operator
 import re
+import time
 from functools import reduce
-from typing import Any, Dict, List, Match, Optional, Union
+from typing import Any, Dict, List, Match, Optional, Union, cast
 
 from datahub.emitter import mce_builder
 from datahub.emitter.mce_builder import OwnerType
 from datahub.metadata.schema_classes import (
+    AuditStampClass,
+    InstitutionalMemoryClass,
+    InstitutionalMemoryMetadataClass,
     OwnerClass,
     OwnershipClass,
     OwnershipSourceClass,
@@ -39,6 +43,7 @@ def _insert_match_value(original_value: str, match_value: str) -> str:
 
 
 class Constants:
+    ADD_DOC_LINK_OPERATION = "add_doc_link"
     ADD_TAG_OPERATION = "add_tag"
     ADD_TERM_OPERATION = "add_term"
     ADD_TERMS_OPERATION = "add_terms"
@@ -47,6 +52,8 @@ class Constants:
     OPERATION_CONFIG = "config"
     TAG = "tag"
     TERM = "term"
+    DOC_LINK = "link"
+    DOC_DESCRIPTION = "description"
     OWNER_TYPE = "owner_type"
     OWNER_CATEGORY = "owner_category"
     MATCH = "match"
@@ -163,7 +170,6 @@ def process(self, raw_props: Dict[str, Any]) -> Dict[str, Any]:
                             )
                             operations_value_list.append(operation)  # type: ignore
                             operations_map[operation_type] = operations_value_list
-
             aspect_map = self.convert_to_aspects(operations_map)
         except Exception as e:
             logger.error(f"Error while processing operation defs over raw_props: {e}")
@@ -173,6 +179,7 @@ def convert_to_aspects(
         self, operation_map: Dict[str, Union[set, list]]
     ) -> Dict[str, Any]:
         aspect_map: Dict[str, Any] = {}
+
         if Constants.ADD_TAG_OPERATION in operation_map:
             tag_aspect = mce_builder.make_global_tag_aspect_with_tag_list(
                 sorted(operation_map[Constants.ADD_TAG_OPERATION])
@@ -195,11 +202,57 @@ def convert_to_aspects(
                 ]
             )
             aspect_map[Constants.ADD_OWNER_OPERATION] = owner_aspect
+
         if Constants.ADD_TERM_OPERATION in operation_map:
             term_aspect = mce_builder.make_glossary_terms_aspect_from_urn_list(
                 sorted(operation_map[Constants.ADD_TERM_OPERATION])
             )
             aspect_map[Constants.ADD_TERM_OPERATION] = term_aspect
+
+        if Constants.ADD_DOC_LINK_OPERATION in operation_map:
+            try:
+                if len(
+                    operation_map[Constants.ADD_DOC_LINK_OPERATION]
+                ) == 1 and isinstance(
+                    operation_map[Constants.ADD_DOC_LINK_OPERATION], list
+                ):
+                    docs_dict = cast(
+                        List[Dict], operation_map[Constants.ADD_DOC_LINK_OPERATION]
+                    )[0]
+                    if "description" not in docs_dict or "link" not in docs_dict:
+                        raise Exception(
+                            "Documentation_link meta_mapping config needs a description key and a link key"
+                        )
+
+                    now = int(time.time() * 1000)  # milliseconds since epoch
+                    institutional_memory_element = InstitutionalMemoryMetadataClass(
+                        url=docs_dict["link"],
+                        description=docs_dict["description"],
+                        createStamp=AuditStampClass(
+                            time=now, actor="urn:li:corpuser:ingestion"
+                        ),
+                    )
+
+                    # create a new institutional memory aspect
+                    institutional_memory_aspect = InstitutionalMemoryClass(
+                        elements=[institutional_memory_element]
+                    )
+
+                    aspect_map[
+                        Constants.ADD_DOC_LINK_OPERATION
+                    ] = institutional_memory_aspect
+                else:
+                    raise Exception(
+                        f"Expected 1 item of type list for the documentation_link meta_mapping config,"
+                        f" received type of {type(operation_map[Constants.ADD_DOC_LINK_OPERATION])}"
+                        f", and size of {len(operation_map[Constants.ADD_DOC_LINK_OPERATION])}."
+                    )
+
+            except Exception as e:
+                logger.error(
+                    f"Error while constructing aspect for documentation link and description : {e}"
+                )
+
         return aspect_map
 
     def get_operation_value(
@@ -248,6 +301,16 @@ def get_operation_value(
             term = operation_config[Constants.TERM]
             term = _insert_match_value(term, _get_best_match(match, "term"))
             return mce_builder.make_term_urn(term)
+        elif (
+            operation_type == Constants.ADD_DOC_LINK_OPERATION
+            and operation_config[Constants.DOC_LINK]
+            and operation_config[Constants.DOC_DESCRIPTION]
+        ):
+            link = operation_config[Constants.DOC_LINK]
+            link = _insert_match_value(link, _get_best_match(match, "link"))
+            description = operation_config[Constants.DOC_DESCRIPTION]
+            return {"link": link, "description": description}
+
         elif operation_type == Constants.ADD_TERMS_OPERATION:
             separator = operation_config.get(Constants.SEPARATOR, ",")
             captured_terms = match.group(0)
diff --git a/metadata-ingestion/tests/unit/test_mapping.py b/metadata-ingestion/tests/unit/test_mapping.py
index d69dd4a8a96b0..5c258f16535f8 100644
--- a/metadata-ingestion/tests/unit/test_mapping.py
+++ b/metadata-ingestion/tests/unit/test_mapping.py
@@ -4,6 +4,7 @@
 from datahub.metadata.schema_classes import (
     GlobalTagsClass,
     GlossaryTermsClass,
+    InstitutionalMemoryClass,
     OwnerClass,
     OwnershipClass,
     OwnershipSourceTypeClass,
@@ -233,6 +234,46 @@ def test_operation_processor_advanced_matching_tags():
     assert tag_aspect.tags[0].tag == "urn:li:tag:case_4567"
 
 
+def test_operation_processor_institutional_memory():
+    raw_props = {
+        "documentation_link": "https://test.com/documentation#ignore-this",
+    }
+    processor = OperationProcessor(
+        operation_defs={
+            "documentation_link": {
+                "match": r"(?:https?)?\:\/\/\w*[^#]*",
+                "operation": "add_doc_link",
+                "config": {"link": "{{ $match }}", "description": "test"},
+            },
+        },
+    )
+    aspect_map = processor.process(raw_props)
+    assert "add_doc_link" in aspect_map
+
+    doc_link_aspect: InstitutionalMemoryClass = aspect_map["add_doc_link"]
+
+    assert doc_link_aspect.elements[0].url == "https://test.com/documentation"
+    assert doc_link_aspect.elements[0].description == "test"
+
+
+def test_operation_processor_institutional_memory_no_description():
+    raw_props = {
+        "documentation_link": "test.com/documentation#ignore-this",
+    }
+    processor = OperationProcessor(
+        operation_defs={
+            "documentation_link": {
+                "match": r"(?:https?)?\:\/\/\w*[^#]*",
+                "operation": "add_doc_link",
+                "config": {"link": "{{ $match }}"},
+            },
+        },
+    )
+    # we require a description, so this should stay empty
+    aspect_map = processor.process(raw_props)
+    assert aspect_map == {}
+
+
 def test_operation_processor_matching_nested_props():
     raw_props = {
         "gdpr": {

From 0f8d2757352597ceaed62b93547381255dbc650e Mon Sep 17 00:00:00 2001
From: John Joyce <john@acryl.io>
Date: Wed, 4 Oct 2023 20:03:40 -0700
Subject: [PATCH 090/156] refactor(style): Improve search bar input focus +
 styling (#8955)

---
 .../src/app/search/SearchBar.tsx              | 46 +++++++++++--------
 .../src/app/shared/admin/HeaderLinks.tsx      | 28 +++++------
 .../src/conf/theme/theme_dark.config.json     |  4 +-
 .../src/conf/theme/theme_light.config.json    |  4 +-
 4 files changed, 46 insertions(+), 36 deletions(-)

diff --git a/datahub-web-react/src/app/search/SearchBar.tsx b/datahub-web-react/src/app/search/SearchBar.tsx
index fb10e1ca0026e..b4699994bc460 100644
--- a/datahub-web-react/src/app/search/SearchBar.tsx
+++ b/datahub-web-react/src/app/search/SearchBar.tsx
@@ -6,7 +6,7 @@ import { useHistory } from 'react-router';
 import { AutoCompleteResultForEntity, EntityType, FacetFilterInput, ScenarioType } from '../../types.generated';
 import EntityRegistry from '../entity/EntityRegistry';
 import filterSearchQuery from './utils/filterSearchQuery';
-import { ANTD_GRAY, ANTD_GRAY_V2 } from '../entity/shared/constants';
+import { ANTD_GRAY, ANTD_GRAY_V2, REDESIGN_COLORS } from '../entity/shared/constants';
 import { getEntityPath } from '../entity/shared/containers/profile/utils';
 import { EXACT_SEARCH_PREFIX } from './utils/constants';
 import { useListRecommendationsQuery } from '../../graphql/recommendations.generated';
@@ -20,7 +20,6 @@ import RecommendedOption from './autoComplete/RecommendedOption';
 import SectionHeader, { EntityTypeLabel } from './autoComplete/SectionHeader';
 import { useUserContext } from '../context/useUserContext';
 import { navigateToSearchUrl } from './utils/navigateToSearchUrl';
-import { getQuickFilterDetails } from './autoComplete/quickFilters/utils';
 import ViewAllSearchItem from './ViewAllSearchItem';
 import { ViewSelect } from '../entity/view/select/ViewSelect';
 import { combineSiblingsInAutoComplete } from './utils/combineSiblingsInAutoComplete';
@@ -39,13 +38,14 @@ const StyledSearchBar = styled(Input)`
     &&& {
         border-radius: 70px;
         height: 40px;
-        font-size: 20px;
-        color: ${ANTD_GRAY[7]};
-        background-color: ${ANTD_GRAY_V2[2]};
-    }
-    > .ant-input {
         font-size: 14px;
+        color: ${ANTD_GRAY[7]};
         background-color: ${ANTD_GRAY_V2[2]};
+        border: 2px solid transparent;
+
+        &:focus-within {
+            border: 1.5px solid ${REDESIGN_COLORS.BLUE};
+        }
     }
     > .ant-input::placeholder {
         color: ${ANTD_GRAY_V2[10]};
@@ -203,23 +203,16 @@ export const SearchBar = ({
     const { quickFilters, selectedQuickFilter, setSelectedQuickFilter } = useQuickFiltersContext();
 
     const autoCompleteQueryOptions = useMemo(() => {
-        const query = suggestions.length ? effectiveQuery : '';
-        const selectedQuickFilterLabel =
-            showQuickFilters && selectedQuickFilter
-                ? getQuickFilterDetails(selectedQuickFilter, entityRegistry).label
-                : '';
-        const text = query || selectedQuickFilterLabel;
-
-        if (!text) return [];
+        if (effectiveQuery === '') return [];
 
         return [
             {
-                value: `${EXACT_SEARCH_PREFIX}${text}`,
-                label: <ViewAllSearchItem searchTarget={text} />,
+                value: `${EXACT_SEARCH_PREFIX}${effectiveQuery}`,
+                label: <ViewAllSearchItem searchTarget={effectiveQuery} />,
                 type: EXACT_AUTOCOMPLETE_OPTION_TYPE,
             },
         ];
-    }, [showQuickFilters, suggestions.length, effectiveQuery, selectedQuickFilter, entityRegistry]);
+    }, [effectiveQuery]);
 
     const autoCompleteEntityOptions = useMemo(() => {
         return suggestions.map((suggestion: AutoCompleteResultForEntity) => {
@@ -296,6 +289,22 @@ export const SearchBar = ({
         }
     }
 
+    const searchInputRef = useRef(null);
+
+    useEffect(() => {
+        const handleKeyDown = (event) => {
+            // Support command-k to select the search bar.
+            // 75 is the keyCode for 'k'
+            if ((event.metaKey || event.ctrlKey) && event.keyCode === 75) {
+                (searchInputRef?.current as any)?.focus();
+            }
+        };
+        document.addEventListener('keydown', handleKeyDown);
+        return () => {
+            document.removeEventListener('keydown', handleKeyDown);
+        };
+    }, []);
+
     return (
         <AutoCompleteContainer style={style} ref={searchBarWrapperRef}>
             <StyledAutoComplete
@@ -399,6 +408,7 @@ export const SearchBar = ({
                             />
                         </>
                     }
+                    ref={searchInputRef}
                 />
             </StyledAutoComplete>
         </AutoCompleteContainer>
diff --git a/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx b/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx
index ced7d8642576b..ce1ad93565ba4 100644
--- a/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx
+++ b/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx
@@ -93,20 +93,6 @@ export function HeaderLinks(props: Props) {
                     </Link>
                 </LinkWrapper>
             )}
-            {showIngestion && (
-                <LinkWrapper>
-                    <Link to="/ingestion">
-                        <Button id={HOME_PAGE_INGESTION_ID} type="text">
-                            <Tooltip title="Connect DataHub to your organization's data sources">
-                                <NavTitleContainer>
-                                    <ApiOutlined />
-                                    <NavTitleText>Ingestion</NavTitleText>
-                                </NavTitleContainer>
-                            </Tooltip>
-                        </Button>
-                    </Link>
-                </LinkWrapper>
-            )}
             <Dropdown
                 trigger={['click']}
                 overlay={
@@ -145,6 +131,20 @@ export function HeaderLinks(props: Props) {
                     </Button>
                 </LinkWrapper>
             </Dropdown>
+            {showIngestion && (
+                <LinkWrapper>
+                    <Link to="/ingestion">
+                        <Button id={HOME_PAGE_INGESTION_ID} type="text">
+                            <Tooltip title="Connect DataHub to your organization's data sources">
+                                <NavTitleContainer>
+                                    <ApiOutlined />
+                                    <NavTitleText>Ingestion</NavTitleText>
+                                </NavTitleContainer>
+                            </Tooltip>
+                        </Button>
+                    </Link>
+                </LinkWrapper>
+            )}
             {showSettings && (
                 <LinkWrapper style={{ marginRight: 12 }}>
                     <Link to="/settings">
diff --git a/datahub-web-react/src/conf/theme/theme_dark.config.json b/datahub-web-react/src/conf/theme/theme_dark.config.json
index 9746c3ddde5f3..54ebebd3b692b 100644
--- a/datahub-web-react/src/conf/theme/theme_dark.config.json
+++ b/datahub-web-react/src/conf/theme/theme_dark.config.json
@@ -30,7 +30,7 @@
             "homepageMessage": "Find data you can count(*) on"
         },
         "search": {
-            "searchbarMessage": "Search Datasets, People, & more..."
+            "searchbarMessage": "Search Tables, Dashboards, People, & more..."
         },
         "menu": {
             "items": [
@@ -52,4 +52,4 @@
             ]
         }
     }
-}
+}
\ No newline at end of file
diff --git a/datahub-web-react/src/conf/theme/theme_light.config.json b/datahub-web-react/src/conf/theme/theme_light.config.json
index 906c04e38a1ba..6b9ef3eac52b0 100644
--- a/datahub-web-react/src/conf/theme/theme_light.config.json
+++ b/datahub-web-react/src/conf/theme/theme_light.config.json
@@ -33,7 +33,7 @@
             "homepageMessage": "Find data you can count on"
         },
         "search": {
-            "searchbarMessage": "Search Datasets, People, & more..."
+            "searchbarMessage": "Search Tables, Dashboards, People, & more..."
         },
         "menu": {
             "items": [
@@ -60,4 +60,4 @@
             ]
         }
     }
-}
+}
\ No newline at end of file

From 817c371fbf8f8287480a2150925e9526a28f1f6e Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Wed, 4 Oct 2023 23:11:06 -0400
Subject: [PATCH 091/156] feat: data contracts models + CLI (#8923)

Co-authored-by: Shirshanka Das <shirshanka@apache.org>
Co-authored-by: John Joyce <john@acryl.io>
---
 .../linkedin/datahub/graphql/TestUtils.java   |   3 +
 .../test/resources/test-entity-registry.yaml  |   8 +
 .../pet_of_the_week.dhub.dc.yaml              |  21 ++
 .../api/entities/datacontract/__init__.py     |   0
 .../datacontract/data_quality_assertion.py    | 107 +++++++++
 .../api/entities/datacontract/datacontract.py | 213 ++++++++++++++++++
 .../datacontract/freshness_assertion.py       |  86 +++++++
 .../entities/datacontract/schema_assertion.py |  81 +++++++
 .../datahub/cli/specific/datacontract_cli.py  |  80 +++++++
 .../src/datahub/cli/specific/file_loader.py   |  26 +--
 .../src/datahub/emitter/mce_builder.py        |  24 +-
 .../src/datahub/emitter/mcp_builder.py        |  27 +--
 metadata-ingestion/src/datahub/entrypoints.py |   2 +
 .../src/datahub/ingestion/api/closeable.py    |   8 +-
 .../ingestion/source/dbt/dbt_common.py        |  28 ++-
 .../integrations/great_expectations/action.py |  19 +-
 .../tests/unit/test_mcp_builder.py            |   3 +-
 .../linkedin/assertion/AssertionAction.pdl    |  22 ++
 .../linkedin/assertion/AssertionActions.pdl   |  18 ++
 .../com/linkedin/assertion/AssertionInfo.pdl  |  49 +++-
 .../linkedin/assertion/AssertionResult.pdl    |  18 +-
 .../assertion/AssertionResultError.pdl        |  45 ++++
 .../linkedin/assertion/AssertionRunEvent.pdl  |  57 +++--
 .../linkedin/assertion/AssertionSource.pdl    |  27 +++
 .../assertion/AssertionStdAggregation.pdl     |  10 +-
 .../assertion/AssertionValueChangeType.pdl    |  16 ++
 .../com/linkedin/assertion/AuditLogSpec.pdl   |  18 ++
 .../assertion/DatasetAssertionInfo.pdl        |  19 +-
 .../assertion/FixedIntervalSchedule.pdl       |  10 +
 .../assertion/FreshnessAssertionInfo.pdl      |  53 +++++
 .../assertion/FreshnessAssertionSchedule.pdl  |  66 ++++++
 .../assertion/FreshnessCronSchedule.pdl       |  25 ++
 .../linkedin/assertion/FreshnessFieldKind.pdl |  17 ++
 .../linkedin/assertion/FreshnessFieldSpec.pdl |  14 ++
 .../IncrementingSegmentFieldTransformer.pdl   |  60 +++++
 .../IncrementingSegmentRowCountChange.pdl     |  33 +++
 .../IncrementingSegmentRowCountTotal.pdl      |  27 +++
 .../assertion/IncrementingSegmentSpec.pdl     |  33 +++
 .../com/linkedin/assertion/RowCountChange.pdl |  27 +++
 .../com/linkedin/assertion/RowCountTotal.pdl  |  22 ++
 .../assertion/SchemaAssertionInfo.pdl         |  29 +++
 .../assertion/VolumeAssertionInfo.pdl         |  82 +++++++
 .../datacontract/DataContractProperties.pdl   |  59 +++++
 .../datacontract/DataContractStatus.pdl       |  27 +++
 .../datacontract/DataQualityContract.pdl      |  16 ++
 .../datacontract/FreshnessContract.pdl        |  13 ++
 .../linkedin/datacontract/SchemaContract.pdl  |  13 ++
 .../com/linkedin/dataset/DatasetFilter.pdl    |  30 +++
 .../linkedin/metadata/key/DataContractKey.pdl |  14 ++
 .../com/linkedin/schema/SchemaFieldSpec.pdl   |  21 ++
 .../src/main/resources/entity-registry.yml    |   9 +
 51 files changed, 1641 insertions(+), 94 deletions(-)
 create mode 100644 metadata-ingestion/examples/data_contract/pet_of_the_week.dhub.dc.yaml
 create mode 100644 metadata-ingestion/src/datahub/api/entities/datacontract/__init__.py
 create mode 100644 metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py
 create mode 100644 metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py
 create mode 100644 metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py
 create mode 100644 metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py
 create mode 100644 metadata-ingestion/src/datahub/cli/specific/datacontract_cli.py
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionAction.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionActions.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResultError.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionSource.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionValueChangeType.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/AuditLogSpec.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/FixedIntervalSchedule.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessAssertionInfo.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessAssertionSchedule.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessCronSchedule.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessFieldKind.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessFieldSpec.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentFieldTransformer.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentRowCountChange.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentRowCountTotal.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentSpec.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/RowCountChange.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/RowCountTotal.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/SchemaAssertionInfo.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/VolumeAssertionInfo.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/datacontract/DataContractProperties.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/datacontract/DataContractStatus.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/datacontract/DataQualityContract.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/datacontract/FreshnessContract.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/datacontract/SchemaContract.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/dataset/DatasetFilter.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/key/DataContractKey.pdl
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/schema/SchemaFieldSpec.pdl

diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java
index 272a93fa1989c..606123cac926d 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java
@@ -8,6 +8,7 @@
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.linkedin.common.AuditStamp;
 import com.linkedin.common.urn.UrnUtils;
+import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor;
 import com.linkedin.metadata.entity.EntityService;
 import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl;
 import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
@@ -21,6 +22,8 @@
 public class TestUtils {
 
   public static EntityService getMockEntityService() {
+    PathSpecBasedSchemaAnnotationVisitor.class.getClassLoader()
+        .setClassAssertionStatus(PathSpecBasedSchemaAnnotationVisitor.class.getName(), false);
     EntityRegistry registry = new ConfigEntityRegistry(TestUtils.class.getResourceAsStream("/test-entity-registry.yaml"));
     EntityService mockEntityService = Mockito.mock(EntityService.class);
     Mockito.when(mockEntityService.getEntityRegistry()).thenReturn(registry);
diff --git a/datahub-graphql-core/src/test/resources/test-entity-registry.yaml b/datahub-graphql-core/src/test/resources/test-entity-registry.yaml
index d694ae53ac42f..efd75a7fb07f5 100644
--- a/datahub-graphql-core/src/test/resources/test-entity-registry.yaml
+++ b/datahub-graphql-core/src/test/resources/test-entity-registry.yaml
@@ -181,6 +181,7 @@ entities:
     - assertionInfo
     - dataPlatformInstance
     - assertionRunEvent
+    - assertionActions
     - status
 - name: dataHubRetention
   category: internal
@@ -292,4 +293,11 @@ entities:
   aspects:
     - ownershipTypeInfo
     - status
+- name: dataContract
+  category: core
+  keyAspect: dataContractKey
+  aspects:
+    - dataContractProperties
+    - dataContractStatus
+    - status
 events:
diff --git a/metadata-ingestion/examples/data_contract/pet_of_the_week.dhub.dc.yaml b/metadata-ingestion/examples/data_contract/pet_of_the_week.dhub.dc.yaml
new file mode 100644
index 0000000000000..c73904403f678
--- /dev/null
+++ b/metadata-ingestion/examples/data_contract/pet_of_the_week.dhub.dc.yaml
@@ -0,0 +1,21 @@
+# id: pet_details_dc # Optional: This is the unique identifier for the data contract
+display_name: Data Contract for SampleHiveDataset
+entity: urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)
+freshness:
+  time: 0700
+  granularity: DAILY
+schema:
+  properties:
+    field_foo:
+      type: string
+      native_type: VARCHAR(100)
+    field_bar:
+      type: boolean
+  required:
+    - field_bar
+data_quality:
+  - type: column_range
+    config:
+      column: field_foo
+      min: 0
+      max: 100
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/__init__.py b/metadata-ingestion/src/datahub/api/entities/datacontract/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py
new file mode 100644
index 0000000000000..a665e95e93c43
--- /dev/null
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py
@@ -0,0 +1,107 @@
+from typing import List, Optional, Union
+
+import pydantic
+from typing_extensions import Literal
+
+import datahub.emitter.mce_builder as builder
+from datahub.configuration.common import ConfigModel
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.metadata.schema_classes import (
+    AssertionInfoClass,
+    AssertionStdAggregationClass,
+    AssertionStdOperatorClass,
+    AssertionStdParameterClass,
+    AssertionStdParametersClass,
+    AssertionStdParameterTypeClass,
+    AssertionTypeClass,
+    DatasetAssertionInfoClass,
+    DatasetAssertionScopeClass,
+)
+
+
+class IdConfigMixin(ConfigModel):
+    id_raw: Optional[str] = pydantic.Field(
+        default=None,
+        alias="id",
+        description="The id of the assertion. If not provided, one will be generated using the type.",
+    )
+
+    def generate_default_id(self) -> str:
+        raise NotImplementedError
+
+
+class CustomSQLAssertion(IdConfigMixin, ConfigModel):
+    type: Literal["custom_sql"]
+
+    sql: str
+
+    def generate_dataset_assertion_info(
+        self, entity_urn: str
+    ) -> DatasetAssertionInfoClass:
+        return DatasetAssertionInfoClass(
+            dataset=entity_urn,
+            scope=DatasetAssertionScopeClass.UNKNOWN,
+            fields=[],
+            operator=AssertionStdOperatorClass._NATIVE_,
+            aggregation=AssertionStdAggregationClass._NATIVE_,
+            logic=self.sql,
+        )
+
+
+class ColumnUniqueAssertion(IdConfigMixin, ConfigModel):
+    type: Literal["unique"]
+
+    # TODO: support multiple columns?
+    column: str
+
+    def generate_default_id(self) -> str:
+        return f"{self.type}-{self.column}"
+
+    def generate_dataset_assertion_info(
+        self, entity_urn: str
+    ) -> DatasetAssertionInfoClass:
+        return DatasetAssertionInfoClass(
+            dataset=entity_urn,
+            scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+            fields=[builder.make_schema_field_urn(entity_urn, self.column)],
+            operator=AssertionStdOperatorClass.EQUAL_TO,
+            aggregation=AssertionStdAggregationClass.UNIQUE_PROPOTION,  # purposely using the misspelled version to work with gql
+            parameters=AssertionStdParametersClass(
+                value=AssertionStdParameterClass(
+                    value="1", type=AssertionStdParameterTypeClass.NUMBER
+                )
+            ),
+        )
+
+
+class DataQualityAssertion(ConfigModel):
+    __root__: Union[
+        CustomSQLAssertion,
+        ColumnUniqueAssertion,
+    ] = pydantic.Field(discriminator="type")
+
+    @property
+    def id(self) -> str:
+        if self.__root__.id_raw:
+            return self.__root__.id_raw
+        try:
+            return self.__root__.generate_default_id()
+        except NotImplementedError:
+            return self.__root__.type
+
+    def generate_mcp(
+        self, assertion_urn: str, entity_urn: str
+    ) -> List[MetadataChangeProposalWrapper]:
+        dataset_assertion_info = self.__root__.generate_dataset_assertion_info(
+            entity_urn
+        )
+
+        return [
+            MetadataChangeProposalWrapper(
+                entityUrn=assertion_urn,
+                aspect=AssertionInfoClass(
+                    type=AssertionTypeClass.DATASET,
+                    datasetAssertion=dataset_assertion_info,
+                ),
+            )
+        ]
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py b/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py
new file mode 100644
index 0000000000000..2df446623a9d6
--- /dev/null
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py
@@ -0,0 +1,213 @@
+import collections
+from typing import Iterable, List, Optional, Tuple
+
+import pydantic
+from ruamel.yaml import YAML
+from typing_extensions import Literal
+
+import datahub.emitter.mce_builder as builder
+from datahub.api.entities.datacontract.data_quality_assertion import (
+    DataQualityAssertion,
+)
+from datahub.api.entities.datacontract.freshness_assertion import FreshnessAssertion
+from datahub.api.entities.datacontract.schema_assertion import SchemaAssertion
+from datahub.configuration.common import ConfigModel
+from datahub.emitter.mce_builder import datahub_guid, make_assertion_urn
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.metadata.schema_classes import (
+    DataContractPropertiesClass,
+    DataContractStateClass,
+    DataContractStatusClass,
+    DataQualityContractClass,
+    FreshnessContractClass,
+    SchemaContractClass,
+    StatusClass,
+)
+from datahub.utilities.urns.urn import guess_entity_type
+
+
+class DataContract(ConfigModel):
+    """A yml representation of a Data Contract.
+
+    This model is used as a simpler, Python-native representation of a DataHub data contract.
+    It can be easily parsed from a YAML file, and can be easily converted into series of MCPs
+    that can be emitted to DataHub.
+    """
+
+    version: Literal[1]
+
+    id: Optional[str] = pydantic.Field(
+        default=None,
+        alias="urn",
+        description="The data contract urn. If not provided, one will be generated.",
+    )
+    entity: str = pydantic.Field(
+        description="The entity urn that the Data Contract is associated with"
+    )
+    # TODO: add support for properties
+    # properties: Optional[Dict[str, str]] = None
+
+    schema_field: Optional[SchemaAssertion] = pydantic.Field(
+        default=None, alias="schema"
+    )
+
+    freshness: Optional[FreshnessAssertion] = pydantic.Field(default=None)
+
+    # TODO: Add a validator to ensure that ids are unique
+    data_quality: Optional[List[DataQualityAssertion]] = None
+
+    _original_yaml_dict: Optional[dict] = None
+
+    @pydantic.validator("data_quality")
+    def validate_data_quality(
+        cls, data_quality: Optional[List[DataQualityAssertion]]
+    ) -> Optional[List[DataQualityAssertion]]:
+        if data_quality:
+            # Raise an error if there are duplicate ids.
+            id_counts = collections.Counter(dq_check.id for dq_check in data_quality)
+            duplicates = [id for id, count in id_counts.items() if count > 1]
+
+            if duplicates:
+                raise ValueError(
+                    f"Got multiple data quality tests with the same type or ID: {duplicates}. Set a unique ID for each data quality test."
+                )
+
+        return data_quality
+
+    @property
+    def urn(self) -> str:
+        if self.id:
+            assert guess_entity_type(self.id) == "dataContract"
+            return self.id
+
+        # Data contract urns are stable
+        guid_obj = {"entity": self.entity}
+        urn = f"urn:li:dataContract:{datahub_guid(guid_obj)}"
+        return urn
+
+    def _generate_freshness_assertion(
+        self, freshness: FreshnessAssertion
+    ) -> Tuple[str, List[MetadataChangeProposalWrapper]]:
+        guid_dict = {
+            "contract": self.urn,
+            "entity": self.entity,
+            "freshness": freshness.id,
+        }
+        assertion_urn = builder.make_assertion_urn(builder.datahub_guid(guid_dict))
+
+        return (
+            assertion_urn,
+            freshness.generate_mcp(assertion_urn, self.entity),
+        )
+
+    def _generate_schema_assertion(
+        self, schema_metadata: SchemaAssertion
+    ) -> Tuple[str, List[MetadataChangeProposalWrapper]]:
+        # ingredients for guid -> the contract id, the fact that this is a schema assertion and the entity on which the assertion is made
+        guid_dict = {
+            "contract": self.urn,
+            "entity": self.entity,
+            "schema": schema_metadata.id,
+        }
+        assertion_urn = make_assertion_urn(datahub_guid(guid_dict))
+
+        return (
+            assertion_urn,
+            schema_metadata.generate_mcp(assertion_urn, self.entity),
+        )
+
+    def _generate_data_quality_assertion(
+        self, data_quality: DataQualityAssertion
+    ) -> Tuple[str, List[MetadataChangeProposalWrapper]]:
+        guid_dict = {
+            "contract": self.urn,
+            "entity": self.entity,
+            "data_quality": data_quality.id,
+        }
+        assertion_urn = make_assertion_urn(datahub_guid(guid_dict))
+
+        return (
+            assertion_urn,
+            data_quality.generate_mcp(assertion_urn, self.entity),
+        )
+
+    def _generate_dq_assertions(
+        self, data_quality_spec: List[DataQualityAssertion]
+    ) -> Tuple[List[str], List[MetadataChangeProposalWrapper]]:
+        assertion_urns = []
+        assertion_mcps = []
+
+        for dq_check in data_quality_spec:
+            assertion_urn, assertion_mcp = self._generate_data_quality_assertion(
+                dq_check
+            )
+
+            assertion_urns.append(assertion_urn)
+            assertion_mcps.extend(assertion_mcp)
+
+        return (assertion_urns, assertion_mcps)
+
+    def generate_mcp(
+        self,
+    ) -> Iterable[MetadataChangeProposalWrapper]:
+        schema_assertion_urn = None
+        if self.schema_field is not None:
+            (
+                schema_assertion_urn,
+                schema_assertion_mcps,
+            ) = self._generate_schema_assertion(self.schema_field)
+            yield from schema_assertion_mcps
+
+        freshness_assertion_urn = None
+        if self.freshness:
+            (
+                freshness_assertion_urn,
+                sla_assertion_mcps,
+            ) = self._generate_freshness_assertion(self.freshness)
+            yield from sla_assertion_mcps
+
+        dq_assertions, dq_assertion_mcps = self._generate_dq_assertions(
+            self.data_quality or []
+        )
+        yield from dq_assertion_mcps
+
+        # Now that we've generated the assertions, we can generate
+        # the actual data contract.
+        yield from MetadataChangeProposalWrapper.construct_many(
+            entityUrn=self.urn,
+            aspects=[
+                DataContractPropertiesClass(
+                    entity=self.entity,
+                    schema=[SchemaContractClass(assertion=schema_assertion_urn)]
+                    if schema_assertion_urn
+                    else None,
+                    freshness=[
+                        FreshnessContractClass(assertion=freshness_assertion_urn)
+                    ]
+                    if freshness_assertion_urn
+                    else None,
+                    dataQuality=[
+                        DataQualityContractClass(assertion=dq_assertion_urn)
+                        for dq_assertion_urn in dq_assertions
+                    ],
+                ),
+                # Also emit status.
+                StatusClass(removed=False),
+                # Emit the contract state as PENDING.
+                DataContractStatusClass(state=DataContractStateClass.PENDING)
+                if True
+                else None,
+            ],
+        )
+
+    @classmethod
+    def from_yaml(
+        cls,
+        file: str,
+    ) -> "DataContract":
+        with open(file) as fp:
+            yaml = YAML(typ="rt")  # default, if not specfied, is 'rt' (round-trip)
+            orig_dictionary = yaml.load(fp)
+            parsed_data_contract = DataContract.parse_obj(orig_dictionary)
+            parsed_data_contract._original_yaml_dict = orig_dictionary
+            return parsed_data_contract
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py
new file mode 100644
index 0000000000000..ee8fa1181e614
--- /dev/null
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py
@@ -0,0 +1,86 @@
+from __future__ import annotations
+
+from datetime import timedelta
+from typing import List, Union
+
+import pydantic
+from typing_extensions import Literal
+
+from datahub.configuration.common import ConfigModel
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.metadata.schema_classes import (
+    AssertionInfoClass,
+    AssertionTypeClass,
+    CalendarIntervalClass,
+    FixedIntervalScheduleClass,
+    FreshnessAssertionInfoClass,
+    FreshnessAssertionScheduleClass,
+    FreshnessAssertionScheduleTypeClass,
+    FreshnessAssertionTypeClass,
+    FreshnessCronScheduleClass,
+)
+
+
+class CronFreshnessAssertion(ConfigModel):
+    type: Literal["cron"]
+
+    cron: str = pydantic.Field(
+        description="The cron expression to use. See https://crontab.guru/ for help."
+    )
+    timezone: str = pydantic.Field(
+        "UTC",
+        description="The timezone to use for the cron schedule. Defaults to UTC.",
+    )
+
+
+class FixedIntervalFreshnessAssertion(ConfigModel):
+    type: Literal["interval"]
+
+    interval: timedelta
+
+
+class FreshnessAssertion(ConfigModel):
+    __root__: Union[
+        CronFreshnessAssertion, FixedIntervalFreshnessAssertion
+    ] = pydantic.Field(discriminator="type")
+
+    @property
+    def id(self):
+        return self.__root__.type
+
+    def generate_mcp(
+        self, assertion_urn: str, entity_urn: str
+    ) -> List[MetadataChangeProposalWrapper]:
+        freshness = self.__root__
+
+        if isinstance(freshness, CronFreshnessAssertion):
+            schedule = FreshnessAssertionScheduleClass(
+                type=FreshnessAssertionScheduleTypeClass.CRON,
+                cron=FreshnessCronScheduleClass(
+                    cron=freshness.cron,
+                    timezone=freshness.timezone,
+                ),
+            )
+        elif isinstance(freshness, FixedIntervalFreshnessAssertion):
+            schedule = FreshnessAssertionScheduleClass(
+                type=FreshnessAssertionScheduleTypeClass.FIXED_INTERVAL,
+                fixedInterval=FixedIntervalScheduleClass(
+                    unit=CalendarIntervalClass.SECOND,
+                    multiple=int(freshness.interval.total_seconds()),
+                ),
+            )
+        else:
+            raise ValueError(f"Unknown freshness type {freshness}")
+
+        assertionInfo = AssertionInfoClass(
+            type=AssertionTypeClass.FRESHNESS,
+            freshnessAssertion=FreshnessAssertionInfoClass(
+                entity=entity_urn,
+                type=FreshnessAssertionTypeClass.DATASET_CHANGE,
+                schedule=schedule,
+            ),
+        )
+
+        return [
+            MetadataChangeProposalWrapper(entityUrn=assertion_urn, aspect=assertionInfo)
+        ]
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py
new file mode 100644
index 0000000000000..b5b592e01f58f
--- /dev/null
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py
@@ -0,0 +1,81 @@
+from __future__ import annotations
+
+import json
+from typing import List, Union
+
+import pydantic
+from typing_extensions import Literal
+
+from datahub.configuration.common import ConfigModel
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.extractor.json_schema_util import get_schema_metadata
+from datahub.metadata.schema_classes import (
+    AssertionInfoClass,
+    AssertionTypeClass,
+    SchemaAssertionInfoClass,
+    SchemaFieldClass,
+    SchemalessClass,
+    SchemaMetadataClass,
+)
+
+
+class JsonSchemaContract(ConfigModel):
+    type: Literal["json-schema"]
+
+    json_schema: dict = pydantic.Field(alias="json-schema")
+
+    _schema_metadata: SchemaMetadataClass
+
+    def _init_private_attributes(self) -> None:
+        super()._init_private_attributes()
+        self._schema_metadata = get_schema_metadata(
+            platform="urn:li:dataPlatform:datahub",
+            name="",
+            json_schema=self.json_schema,
+            raw_schema_string=json.dumps(self.json_schema),
+        )
+
+
+class FieldListSchemaContract(ConfigModel, arbitrary_types_allowed=True):
+    type: Literal["field-list"]
+
+    fields: List[SchemaFieldClass]
+
+    _schema_metadata: SchemaMetadataClass
+
+    def _init_private_attributes(self) -> None:
+        super()._init_private_attributes()
+        self._schema_metadata = SchemaMetadataClass(
+            schemaName="",
+            platform="urn:li:dataPlatform:datahub",
+            version=0,
+            hash="",
+            platformSchema=SchemalessClass(),
+            fields=self.fields,
+        )
+
+
+class SchemaAssertion(ConfigModel):
+    __root__: Union[JsonSchemaContract, FieldListSchemaContract] = pydantic.Field(
+        discriminator="type"
+    )
+
+    @property
+    def id(self):
+        return self.__root__.type
+
+    def generate_mcp(
+        self, assertion_urn: str, entity_urn: str
+    ) -> List[MetadataChangeProposalWrapper]:
+        schema_metadata = self.__root__._schema_metadata
+
+        assertionInfo = AssertionInfoClass(
+            type=AssertionTypeClass.DATA_SCHEMA,
+            schemaAssertion=SchemaAssertionInfoClass(
+                entity=entity_urn, schema=schema_metadata
+            ),
+        )
+
+        return [
+            MetadataChangeProposalWrapper(entityUrn=assertion_urn, aspect=assertionInfo)
+        ]
diff --git a/metadata-ingestion/src/datahub/cli/specific/datacontract_cli.py b/metadata-ingestion/src/datahub/cli/specific/datacontract_cli.py
new file mode 100644
index 0000000000000..3745943c8c96a
--- /dev/null
+++ b/metadata-ingestion/src/datahub/cli/specific/datacontract_cli.py
@@ -0,0 +1,80 @@
+import logging
+from typing import Optional
+
+import click
+from click_default_group import DefaultGroup
+
+from datahub.api.entities.datacontract.datacontract import DataContract
+from datahub.ingestion.graph.client import get_default_graph
+from datahub.telemetry import telemetry
+from datahub.upgrade import upgrade
+
+logger = logging.getLogger(__name__)
+
+
+@click.group(cls=DefaultGroup, default="upsert")
+def datacontract() -> None:
+    """A group of commands to interact with the DataContract entity in DataHub."""
+    pass
+
+
+@datacontract.command()
+@click.option("-f", "--file", required=True, type=click.Path(exists=True))
+@upgrade.check_upgrade
+@telemetry.with_telemetry()
+def upsert(file: str) -> None:
+    """Upsert (create or update) a Data Contract in DataHub."""
+
+    data_contract: DataContract = DataContract.from_yaml(file)
+    urn = data_contract.urn
+
+    with get_default_graph() as graph:
+        if not graph.exists(data_contract.entity):
+            raise ValueError(
+                f"Cannot define a data contract for non-existent entity {data_contract.entity}"
+            )
+
+        try:
+            for mcp in data_contract.generate_mcp():
+                graph.emit(mcp)
+            click.secho(f"Update succeeded for urn {urn}.", fg="green")
+        except Exception as e:
+            logger.exception(e)
+            click.secho(
+                f"Update failed for {urn}: {e}",
+                fg="red",
+            )
+
+
+@datacontract.command()
+@click.option(
+    "--urn", required=False, type=str, help="The urn for the data contract to delete"
+)
+@click.option(
+    "-f",
+    "--file",
+    required=False,
+    type=click.Path(exists=True),
+    help="The file containing the data contract definition",
+)
+@click.option("--hard/--soft", required=False, is_flag=True, default=False)
+@upgrade.check_upgrade
+@telemetry.with_telemetry()
+def delete(urn: Optional[str], file: Optional[str], hard: bool) -> None:
+    """Delete a Data Contract in DataHub. Defaults to a soft-delete. Use --hard to completely erase metadata."""
+
+    if not urn:
+        if not file:
+            raise click.UsageError(
+                "Must provide either an urn or a file to delete a data contract"
+            )
+
+        data_contract = DataContract.from_yaml(file)
+        urn = data_contract.urn
+
+    with get_default_graph() as graph:
+        if not graph.exists(urn):
+            raise ValueError(f"Data Contract {urn} does not exist")
+
+        graph.delete_entity(urn, hard=hard)
+        click.secho(f"Data Contract {urn} deleted")
diff --git a/metadata-ingestion/src/datahub/cli/specific/file_loader.py b/metadata-ingestion/src/datahub/cli/specific/file_loader.py
index 54f12e024d294..a9787343fdb91 100644
--- a/metadata-ingestion/src/datahub/cli/specific/file_loader.py
+++ b/metadata-ingestion/src/datahub/cli/specific/file_loader.py
@@ -1,9 +1,7 @@
-import io
 from pathlib import Path
 from typing import Union
 
-from datahub.configuration.common import ConfigurationError
-from datahub.configuration.yaml import YamlConfigurationMechanism
+from datahub.configuration.config_loader import load_config_file
 
 
 def load_file(config_file: Path) -> Union[dict, list]:
@@ -17,19 +15,11 @@ def load_file(config_file: Path) -> Union[dict, list]:
     evolve to becoming a standard function that all the specific. cli variants will use
     to load up the models from external files
     """
-    if not isinstance(config_file, Path):
-        config_file = Path(config_file)
-    if not config_file.is_file():
-        raise ConfigurationError(f"Cannot open config file {config_file}")
 
-    if config_file.suffix in {".yaml", ".yml"}:
-        config_mech: YamlConfigurationMechanism = YamlConfigurationMechanism()
-    else:
-        raise ConfigurationError(
-            f"Only .yaml and .yml are supported. Cannot process file type {config_file.suffix}"
-        )
-
-    raw_config_file = config_file.read_text()
-    config_fp = io.StringIO(raw_config_file)
-    raw_config = config_mech.load_config(config_fp)
-    return raw_config
+    res = load_config_file(
+        config_file,
+        squirrel_original_config=False,
+        resolve_env_vars=False,
+        allow_stdin=False,
+    )
+    return res
diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py
index 0928818c7005c..64c9ec1bb5704 100644
--- a/metadata-ingestion/src/datahub/emitter/mce_builder.py
+++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py
@@ -1,11 +1,11 @@
 """Convenience functions for creating MCEs"""
+import hashlib
 import json
 import logging
 import os
 import re
 import time
 from enum import Enum
-from hashlib import md5
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -21,7 +21,6 @@
 import typing_inspect
 
 from datahub.configuration.source_common import DEFAULT_ENV as DEFAULT_ENV_CONFIGURATION
-from datahub.emitter.serialization_helper import pre_json_transform
 from datahub.metadata.schema_classes import (
     AssertionKeyClass,
     AuditStampClass,
@@ -159,11 +158,24 @@ def container_urn_to_key(guid: str) -> Optional[ContainerKeyClass]:
     return None
 
 
+class _DatahubKeyJSONEncoder(json.JSONEncoder):
+    # overload method default
+    def default(self, obj: Any) -> Any:
+        if hasattr(obj, "guid"):
+            return obj.guid()
+        # Call the default method for other types
+        return json.JSONEncoder.default(self, obj)
+
+
 def datahub_guid(obj: dict) -> str:
-    obj_str = json.dumps(
-        pre_json_transform(obj), separators=(",", ":"), sort_keys=True
-    ).encode("utf-8")
-    return md5(obj_str).hexdigest()
+    json_key = json.dumps(
+        obj,
+        separators=(",", ":"),
+        sort_keys=True,
+        cls=_DatahubKeyJSONEncoder,
+    )
+    md5_hash = hashlib.md5(json_key.encode("utf-8"))
+    return str(md5_hash.hexdigest())
 
 
 def make_assertion_urn(assertion_id: str) -> str:
diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py
index 7419577b367aa..06f689dfd317b 100644
--- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py
+++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py
@@ -1,11 +1,10 @@
-import hashlib
-import json
-from typing import Any, Dict, Iterable, List, Optional, TypeVar
+from typing import Dict, Iterable, List, Optional, TypeVar
 
 from pydantic.fields import Field
 from pydantic.main import BaseModel
 
 from datahub.emitter.mce_builder import (
+    datahub_guid,
     make_container_urn,
     make_data_platform_urn,
     make_dataplatform_instance_urn,
@@ -33,24 +32,13 @@
 )
 
 
-def _stable_guid_from_dict(d: dict) -> str:
-    json_key = json.dumps(
-        d,
-        separators=(",", ":"),
-        sort_keys=True,
-        cls=DatahubKeyJSONEncoder,
-    )
-    md5_hash = hashlib.md5(json_key.encode("utf-8"))
-    return str(md5_hash.hexdigest())
-
-
 class DatahubKey(BaseModel):
     def guid_dict(self) -> Dict[str, str]:
         return self.dict(by_alias=True, exclude_none=True)
 
     def guid(self) -> str:
         bag = self.guid_dict()
-        return _stable_guid_from_dict(bag)
+        return datahub_guid(bag)
 
 
 class ContainerKey(DatahubKey):
@@ -137,15 +125,6 @@ def as_urn(self) -> str:
         )
 
 
-class DatahubKeyJSONEncoder(json.JSONEncoder):
-    # overload method default
-    def default(self, obj: Any) -> Any:
-        if hasattr(obj, "guid"):
-            return obj.guid()
-        # Call the default method for other types
-        return json.JSONEncoder.default(self, obj)
-
-
 KeyType = TypeVar("KeyType", bound=ContainerKey)
 
 
diff --git a/metadata-ingestion/src/datahub/entrypoints.py b/metadata-ingestion/src/datahub/entrypoints.py
index 84615fd9a6148..5bfab3b841fa3 100644
--- a/metadata-ingestion/src/datahub/entrypoints.py
+++ b/metadata-ingestion/src/datahub/entrypoints.py
@@ -21,6 +21,7 @@
 from datahub.cli.ingest_cli import ingest
 from datahub.cli.migrate import migrate
 from datahub.cli.put_cli import put
+from datahub.cli.specific.datacontract_cli import datacontract
 from datahub.cli.specific.dataproduct_cli import dataproduct
 from datahub.cli.specific.group_cli import group
 from datahub.cli.specific.user_cli import user
@@ -158,6 +159,7 @@ def init() -> None:
 datahub.add_command(user)
 datahub.add_command(group)
 datahub.add_command(dataproduct)
+datahub.add_command(datacontract)
 
 try:
     from datahub.cli.lite_cli import lite
diff --git a/metadata-ingestion/src/datahub/ingestion/api/closeable.py b/metadata-ingestion/src/datahub/ingestion/api/closeable.py
index 523174b9978b3..80a5008ed6368 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/closeable.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/closeable.py
@@ -1,7 +1,9 @@
 from abc import abstractmethod
 from contextlib import AbstractContextManager
 from types import TracebackType
-from typing import Optional, Type
+from typing import Optional, Type, TypeVar
+
+_Self = TypeVar("_Self", bound="Closeable")
 
 
 class Closeable(AbstractContextManager):
@@ -9,6 +11,10 @@ class Closeable(AbstractContextManager):
     def close(self) -> None:
         pass
 
+    def __enter__(self: _Self) -> _Self:
+        # This method is mainly required for type checking.
+        return self
+
     def __exit__(
         self,
         exc_type: Optional[Type[BaseException]],
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
index 3edeb695e9f21..f9b71892975b4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
@@ -701,18 +701,22 @@ def create_test_entity_mcps(
             assertion_urn = mce_builder.make_assertion_urn(
                 mce_builder.datahub_guid(
                     {
-                        "platform": DBT_PLATFORM,
-                        "name": node.dbt_name,
-                        "instance": self.config.platform_instance,
-                        **(
-                            # Ideally we'd include the env unconditionally. However, we started out
-                            # not including env in the guid, so we need to maintain backwards compatibility
-                            # with existing PROD assertions.
-                            {"env": self.config.env}
-                            if self.config.env != mce_builder.DEFAULT_ENV
-                            and self.config.include_env_in_assertion_guid
-                            else {}
-                        ),
+                        k: v
+                        for k, v in {
+                            "platform": DBT_PLATFORM,
+                            "name": node.dbt_name,
+                            "instance": self.config.platform_instance,
+                            **(
+                                # Ideally we'd include the env unconditionally. However, we started out
+                                # not including env in the guid, so we need to maintain backwards compatibility
+                                # with existing PROD assertions.
+                                {"env": self.config.env}
+                                if self.config.env != mce_builder.DEFAULT_ENV
+                                and self.config.include_env_in_assertion_guid
+                                else {}
+                            ),
+                        }.items()
+                        if v is not None
                     }
                 )
             )
diff --git a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py
index f116550328819..8b393a8f6f1c6 100644
--- a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py
+++ b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py
@@ -35,6 +35,7 @@
 from datahub.cli.cli_utils import get_boolean_env_variable
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.rest_emitter import DatahubRestEmitter
+from datahub.emitter.serialization_helper import pre_json_transform
 from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import (
     get_platform_from_sqlalchemy_uri,
 )
@@ -253,13 +254,15 @@ def get_assertions_with_results(
             # possibly for each validation run
             assertionUrn = builder.make_assertion_urn(
                 builder.datahub_guid(
-                    {
-                        "platform": GE_PLATFORM_NAME,
-                        "nativeType": expectation_type,
-                        "nativeParameters": kwargs,
-                        "dataset": assertion_datasets[0],
-                        "fields": assertion_fields,
-                    }
+                    pre_json_transform(
+                        {
+                            "platform": GE_PLATFORM_NAME,
+                            "nativeType": expectation_type,
+                            "nativeParameters": kwargs,
+                            "dataset": assertion_datasets[0],
+                            "fields": assertion_fields,
+                        }
+                    )
                 )
             )
             logger.debug(
@@ -638,7 +641,7 @@ def get_dataset_partitions(self, batch_identifier, data_asset):
                 ].batch_request.runtime_parameters["query"]
                 partitionSpec = PartitionSpecClass(
                     type=PartitionTypeClass.QUERY,
-                    partition=f"Query_{builder.datahub_guid(query)}",
+                    partition=f"Query_{builder.datahub_guid(pre_json_transform(query))}",
                 )
 
                 batchSpec = BatchSpec(
diff --git a/metadata-ingestion/tests/unit/test_mcp_builder.py b/metadata-ingestion/tests/unit/test_mcp_builder.py
index 23f2bddc2084e..561b782ef9e46 100644
--- a/metadata-ingestion/tests/unit/test_mcp_builder.py
+++ b/metadata-ingestion/tests/unit/test_mcp_builder.py
@@ -1,5 +1,4 @@
 import datahub.emitter.mcp_builder as builder
-from datahub.emitter.mce_builder import datahub_guid
 
 
 def test_guid_generator():
@@ -80,7 +79,7 @@ def test_guid_generators():
     key = builder.SchemaKey(
         database="test", schema="Test", platform="mysql", instance="TestInstance"
     )
-    guid_datahub = datahub_guid(key.dict(by_alias=True))
+    guid_datahub = key.guid()
 
     guid = key.guid()
     assert guid == guid_datahub
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionAction.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionAction.pdl
new file mode 100644
index 0000000000000..df6620b66bfd8
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionAction.pdl
@@ -0,0 +1,22 @@
+namespace com.linkedin.assertion
+
+/**
+ * The Actions about an Assertion.
+ * In the future, we'll likely extend this model to support additional
+ * parameters or options related to the assertion actions.
+ */
+record AssertionAction {
+    /**
+     * The type of the Action
+     */
+    type: enum AssertionActionType {
+      /**
+      * Raise an incident.
+      */
+      RAISE_INCIDENT
+      /**
+      * Resolve open incidents related to the assertion.
+      */
+      RESOLVE_INCIDENT
+    }
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionActions.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionActions.pdl
new file mode 100644
index 0000000000000..61846c1ba9c12
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionActions.pdl
@@ -0,0 +1,18 @@
+namespace com.linkedin.assertion
+
+/**
+ * The Actions about an Assertion
+ */
+@Aspect = {
+  "name": "assertionActions"
+}
+record AssertionActions {
+    /**
+    * Actions to be executed on successful assertion run.
+    */
+    onSuccess: array[AssertionAction] = []
+    /**
+    * Actions to be executed on failed assertion run.
+    */
+    onFailure: array[AssertionAction] = []
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl
index 77ee147a781e2..ae2a58028057b 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl
@@ -13,13 +13,58 @@ record AssertionInfo includes CustomProperties, ExternalReference {
     /**
     * Type of assertion. Assertion types can evolve to span Datasets, Flows (Pipelines), Models, Features etc.
     */
+    @Searchable = { }
     type: enum AssertionType {
-      // A single-dataset assertion. When this is the value, the datasetAssertion field will be populated.
+      /**
+       * A single-dataset assertion. When this is the value, the datasetAssertion field will be populated.
+       */
       DATASET
+
+      /**
+       * A freshness assertion, or an assertion which indicates when a particular operation should occur
+       * to an asset.
+       */
+      FRESHNESS
+
+      /**
+       * A volume assertion, or an assertion which indicates how much data should be available for a
+       * particular asset.
+       */
+      VOLUME
+
+      /**
+       * A schema or structural assertion.
+       *
+       * Would have named this SCHEMA but the codegen for PDL does not allow this (reserved word).
+       */
+      DATA_SCHEMA
     }
 
     /**
-    * Dataset Assertion information when type is DATASET
+    * A Dataset Assertion definition. This field is populated when the type is DATASET.
     */
     datasetAssertion: optional DatasetAssertionInfo
+
+    /**
+    * An Freshness Assertion definition. This field is populated when the type is FRESHNESS.
+    */
+    freshnessAssertion: optional FreshnessAssertionInfo
+
+    /**
+    * An Volume Assertion definition. This field is populated when the type is VOLUME.
+    */
+    volumeAssertion: optional VolumeAssertionInfo
+
+    /**
+    * An schema Assertion definition. This field is populated when the type is DATASET_SCHEMA
+    */
+    schemaAssertion: optional SchemaAssertionInfo
+
+    /**
+    * The source or origin of the Assertion definition.
+    *
+    * If the source type of the Assertion is EXTERNAL, it is expected to have a corresponding dataPlatformInstance aspect detailing
+    * the platform where it was ingested from.
+    */
+    source: optional AssertionSource
 }
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResult.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResult.pdl
index decbfc08263de..ded84e1969153 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResult.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResult.pdl
@@ -5,10 +5,15 @@ namespace com.linkedin.assertion
  */
 record AssertionResult {
   /**
-  *  The final result, e.g. either SUCCESS or FAILURE.
+  *  The final result, e.g. either SUCCESS, FAILURE, or ERROR.
   */
   @TimeseriesField = {}
+  @Searchable = {}
   type: enum AssertionResultType {
+    /**
+    *  The Assertion has not yet been fully evaluated
+    */
+    INIT
     /**
     *  The Assertion Succeeded
     */
@@ -17,6 +22,10 @@ record AssertionResult {
     *  The Assertion Failed
     */
     FAILURE
+    /**
+    *  The Assertion encountered an Error
+    */
+    ERROR
   }
 
   /**
@@ -45,8 +54,13 @@ record AssertionResult {
   nativeResults: optional map[string, string]
 
   /**
-   * URL where full results are available
+   * External URL where full results are available. Only present when assertion source is not native.
    */
   externalUrl: optional string
 
+  /**
+   *  The error object if AssertionResultType is an Error
+   */
+  error: optional AssertionResultError
+
 }
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResultError.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResultError.pdl
new file mode 100644
index 0000000000000..e768fe8521942
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResultError.pdl
@@ -0,0 +1,45 @@
+namespace com.linkedin.assertion
+
+/**
+ *  An error encountered when evaluating an AssertionResult
+ */
+record AssertionResultError {
+  /**
+  *  The type of error encountered
+  */
+  type: enum AssertionResultErrorType {
+    /**
+    *  Source is unreachable
+    */
+    SOURCE_CONNECTION_ERROR
+    /**
+    *  Source query failed to execute
+    */
+    SOURCE_QUERY_FAILED
+    /**
+    *  Insufficient data to evaluate the assertion
+    */
+    INSUFFICIENT_DATA
+    /**
+    *  Invalid parameters were detected
+    */
+    INVALID_PARAMETERS
+    /**
+    *  Event type not supported by the specified source
+    */
+    INVALID_SOURCE_TYPE
+    /**
+    *  Unsupported platform
+    */
+    UNSUPPORTED_PLATFORM
+    /**
+    *  Unknown error
+    */
+    UNKNOWN_ERROR
+  }
+
+  /**
+  *  Additional metadata depending on the type of error
+  */
+  properties: optional map[string, string]
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionRunEvent.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionRunEvent.pdl
index 9e75f96fafd06..14f1204232740 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionRunEvent.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionRunEvent.pdl
@@ -1,6 +1,7 @@
 namespace com.linkedin.assertion
 
-import com.linkedin.timeseries.TimeseriesAspectBase
+import com.linkedin.timeseries.PartitionSpec
+import com.linkedin.timeseries.TimeWindowSize
 import com.linkedin.common.ExternalReference
 import com.linkedin.common.Urn
 
@@ -12,36 +13,31 @@ import com.linkedin.common.Urn
   "name": "assertionRunEvent",
   "type": "timeseries",
 }
-record AssertionRunEvent includes TimeseriesAspectBase {
+record AssertionRunEvent {
+
+  /**
+   * The event timestamp field as epoch at UTC in milli seconds.
+   */
+  @Searchable = {
+    "fieldName": "lastCompletedTime",
+    "fieldType": "DATETIME"
+  }
+  timestampMillis: long
 
   /**
   *  Native (platform-specific) identifier for this run
   */
-  //Multiple assertions could occur in same evaluator run
   runId: string
 
-  /*
-  * Urn of assertion which is evaluated
-  */
-  @TimeseriesField = {}
-  assertionUrn: Urn
-
   /*
   * Urn of entity on which the assertion is applicable
   */
-  //example - dataset urn, if dataset is being asserted
   @TimeseriesField = {}
   asserteeUrn: Urn
-  
-  /**
-  * Specification of the batch which this run is evaluating
-  */
-  batchSpec: optional BatchSpec
 
   /**
   * The status of the assertion run as per this timeseries event.
   */
-  // Currently just supports COMPLETE, but should evolve to support other statuses like STARTED, RUNNING, etc.
   @TimeseriesField = {}
   status: enum AssertionRunStatus {
     /**
@@ -59,4 +55,33 @@ record AssertionRunEvent includes TimeseriesAspectBase {
    * Runtime parameters of evaluation
    */
   runtimeContext: optional map[string, string]
+
+  /**
+  * Specification of the batch which this run is evaluating
+  */
+  batchSpec: optional BatchSpec
+
+  /*
+  * Urn of assertion which is evaluated
+  */
+  @TimeseriesField = {}
+  assertionUrn: Urn
+
+  /**
+   * Granularity of the event if applicable
+   */
+  eventGranularity: optional TimeWindowSize
+
+  /**
+   * The optional partition specification.
+   */
+  partitionSpec: optional PartitionSpec = {
+    "type":"FULL_TABLE",
+    "partition":"FULL_TABLE_SNAPSHOT"
+  }
+
+  /**
+   * The optional messageId, if provided serves as a custom user-defined unique identifier for an aspect value.
+   */
+  messageId: optional string
 }
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionSource.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionSource.pdl
new file mode 100644
index 0000000000000..d8892c0c71c6f
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionSource.pdl
@@ -0,0 +1,27 @@
+namespace com.linkedin.assertion
+
+/**
+ * The source of an assertion
+ */
+record AssertionSource {
+    /**
+     * The type of the Assertion Source
+     */
+    @Searchable = {
+       "fieldName": "sourceType"
+    }
+    type:  enum AssertionSourceType {
+      /**
+       * The assertion was defined natively on DataHub by a user.
+       */
+      NATIVE
+      /**
+       * The assertion was defined and managed externally of DataHub.
+       */
+      EXTERNAL
+      /**
+       * The assertion was inferred, e.g. from offline AI / ML models.
+       */
+      INFERRED
+    }
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionStdAggregation.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionStdAggregation.pdl
index b79b96f9379b0..968944165a1c8 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionStdAggregation.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionStdAggregation.pdl
@@ -4,6 +4,7 @@ namespace com.linkedin.assertion
  * The function that is applied to the aggregation input (schema, rows, column values) before evaluating an operator.
  */
 enum AssertionStdAggregation {
+
   /**
   * Assertion is applied on number of rows.
   */
@@ -20,7 +21,7 @@ enum AssertionStdAggregation {
   COLUMN_COUNT
 
   /**
-  * Assertion is applied on individual column value.
+  * Assertion is applied on individual column value. (No aggregation)
   */
   IDENTITY
 
@@ -42,6 +43,13 @@ enum AssertionStdAggregation {
   /**
   * Assertion is applied on proportion of distinct values in column
   */
+  UNIQUE_PROPORTION
+
+  /**
+  * Assertion is applied on proportion of distinct values in column
+  *
+  * Deprecated! Use UNIQUE_PROPORTION instead.
+  */
   UNIQUE_PROPOTION
 
   /**
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionValueChangeType.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionValueChangeType.pdl
new file mode 100644
index 0000000000000..5a1ff4fa73ffb
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionValueChangeType.pdl
@@ -0,0 +1,16 @@
+namespace com.linkedin.assertion
+
+/**
+* An enum to represent a type of change in an assertion value, metric, or measurement.
+*/
+enum AssertionValueChangeType {
+  /**
+   * A change that is defined in absolute terms.
+   */
+   ABSOLUTE
+   /**
+   * A change that is defined in relative terms using percentage change
+   * from the original value.
+   */
+   PERCENTAGE
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AuditLogSpec.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AuditLogSpec.pdl
new file mode 100644
index 0000000000000..4d5bf261cbf89
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AuditLogSpec.pdl
@@ -0,0 +1,18 @@
+namespace com.linkedin.assertion
+
+import com.linkedin.schema.SchemaFieldDataType
+
+/**
+* Information about the Audit Log operation to use in evaluating an assertion.
+**/
+record AuditLogSpec {
+  /**
+  * The list of operation types that should be monitored. If not provided, a default set will be used.
+  */
+  operationTypes: optional array [string]
+
+  /**
+  * Optional: The user name associated with the operation.
+  */
+  userName: optional string
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/DatasetAssertionInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/DatasetAssertionInfo.pdl
index c411c7ff8a572..2a8bf28f1ff11 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/assertion/DatasetAssertionInfo.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/DatasetAssertionInfo.pdl
@@ -18,9 +18,10 @@ record DatasetAssertionInfo {
     /**
     * Scope of the Assertion. What part of the dataset does this assertion apply to?
     **/
+    @Searchable = {}
     scope: enum DatasetAssertionScope {
       /**
-      * This assertion applies to dataset columns
+      * This assertion applies to dataset column(s)
       */
       DATASET_COLUMN
 
@@ -29,6 +30,11 @@ record DatasetAssertionInfo {
       */
       DATASET_ROWS
 
+      /**
+      * This assertion applies to the storage size of the dataset
+      */
+      DATASET_STORAGE_SIZE
+
       /**
       * This assertion applies to the schema of the dataset
       */
@@ -41,7 +47,9 @@ record DatasetAssertionInfo {
     }
 
     /**
-    * One or more dataset schema fields that are targeted by this assertion
+    * One or more dataset schema fields that are targeted by this assertion.
+    *
+    * This field is expected to be provided if the assertion scope is DATASET_COLUMN.
     */
     @Relationship = {
       "/*": {
@@ -49,11 +57,18 @@ record DatasetAssertionInfo {
         "entityTypes": [ "schemaField" ]
       }
     }
+    @Searchable = {
+      "/*": {
+        "fieldType": "URN"
+      }
+    }
     fields: optional array[Urn]
 
     /**
     * Standardized assertion operator
+    * This field is left blank if there is no selected aggregation or metric for a particular column.
     */
+    @Searchable = {}
     aggregation: optional AssertionStdAggregation
 
     /**
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/FixedIntervalSchedule.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/FixedIntervalSchedule.pdl
new file mode 100644
index 0000000000000..c08c33ffb92d3
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/FixedIntervalSchedule.pdl
@@ -0,0 +1,10 @@
+namespace com.linkedin.assertion
+
+import com.linkedin.common.Urn
+import com.linkedin.timeseries.TimeWindowSize
+
+/**
+* Attributes defining a relative fixed interval SLA schedule.
+*/
+record FixedIntervalSchedule includes TimeWindowSize {
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessAssertionInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessAssertionInfo.pdl
new file mode 100644
index 0000000000000..4445a11ff40a7
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessAssertionInfo.pdl
@@ -0,0 +1,53 @@
+namespace com.linkedin.assertion
+
+import com.linkedin.common.Urn
+import com.linkedin.dataset.DatasetFilter
+
+/**
+* Attributes defining a Freshness Assertion.
+**/
+record FreshnessAssertionInfo {
+    /**
+     * The type of the freshness assertion being monitored.
+     */
+    @Searchable = {}
+    type: enum FreshnessAssertionType {
+      /**
+       * An Freshness based on Operations performed on a particular Dataset (insert, update, delete, etc) and sourced from an audit log, as
+       * opposed to based on the highest watermark in a timestamp column (e.g. a query). Only valid when entity is of type "dataset".
+       */
+       DATASET_CHANGE
+       /**
+       * An Freshness based on a successful execution of a Data Job.
+       */
+       DATA_JOB_RUN
+    }
+
+    /**
+    * The entity targeted by this Freshness check.
+    */
+    @Searchable = {
+      "fieldType": "URN"
+    }
+    @Relationship = {
+      "name": "Asserts",
+      "entityTypes": [ "dataset", "dataJob" ]
+    }
+    entity: Urn
+
+    /**
+    * Produce FAILURE Assertion Result if the asset is not updated on the cadence and within the time range described by the schedule.
+    */
+    @Searchable = {
+      "/type": {
+        "fieldName": "scheduleType"
+      }
+    }
+    schedule: FreshnessAssertionSchedule
+
+    /**
+     * A definition of the specific filters that should be applied, when performing monitoring.
+     * If not provided, there is no filter, and the full table is under consideration.
+     */
+    filter: optional DatasetFilter
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessAssertionSchedule.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessAssertionSchedule.pdl
new file mode 100644
index 0000000000000..a87342ad4f5ed
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessAssertionSchedule.pdl
@@ -0,0 +1,66 @@
+namespace com.linkedin.assertion
+
+import com.linkedin.common.Urn
+
+/**
+* Attributes defining a single Freshness schedule.
+*/
+record FreshnessAssertionSchedule {
+
+  /**
+   * The type of a Freshness Assertion Schedule.
+   *
+   * Once we support data-time-relative schedules (e.g. schedules relative to time partitions),
+   * we will add those schedule types here.
+   */
+  type: enum FreshnessAssertionScheduleType {
+    /**
+     * An highly configurable recurring schedule which describes the times of events described
+     * by a CRON schedule, with the evaluation schedule assuming to be matching the cron schedule.
+     *
+     * In a CRON schedule type, we compute the look-back window to be the time between the last scheduled event
+     * and the current event (evaluation time). This means that the evaluation schedule must match exactly
+     * the schedule defined inside the cron schedule.
+     *
+     * For example, a CRON schedule defined as "0 8 * * *" would represent a schedule of "every day by 8am". Assuming
+     * that the assertion evaluation schedule is defined to match this, the freshness assertion would be evaluated in the following way:
+     *
+     *     1. Compute the "last scheduled occurrence" of the event using the CRON schedule. For example, yesterday at 8am.
+     *     2. Compute the bounds of a time window between the "last scheduled occurrence" (yesterday at 8am) until the "current occurrence" (today at 8am)
+     *     3. Verify that the target event has occurred within the CRON-interval window.
+     *     4. If the target event has occurred within the time window, then assertion passes.
+     *     5. If the target event has not occurred within the time window, then the assertion fails.
+     *
+     */
+    CRON
+    /**
+     * A fixed interval which is used to compute a look-back window for use when evaluating the assertion relative
+     * to the Evaluation Time of the Assertion.
+     *
+     * To compute the valid look-back window, we subtract the fixed interval from the evaluation time. Then, we verify
+     * that the target event has occurred within that window.
+     *
+     * For example, a fixed interval of "24h" would represent a schedule of "in the last 24 hours".
+     * The 24 hour interval is relative to the evaluation time of the assertion. For example if we schedule the assertion
+     * to be evaluated each hour, we'd compute the result as follows:
+     *
+     *     1. Subtract the fixed interval from the current time (Evaluation time) to compute the bounds of a fixed look-back window.
+     *     2. Verify that the target event has occurred within the CRON-interval window.
+     *     3. If the target event has occurred within the time window, then assertion passes.
+     *     4. If the target event has not occurred within the time window, then the assertion fails.
+     *
+     */
+    FIXED_INTERVAL
+  }
+
+  /**
+   * A cron schedule. This field is required when type is CRON.
+   */
+  cron: optional FreshnessCronSchedule
+
+  /**
+   * A fixed interval schedule. This field is required when type is FIXED_INTERVAL.
+   */
+  fixedInterval: optional FixedIntervalSchedule
+
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessCronSchedule.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessCronSchedule.pdl
new file mode 100644
index 0000000000000..d48900690c51d
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessCronSchedule.pdl
@@ -0,0 +1,25 @@
+namespace com.linkedin.assertion
+
+/**
+* Attributes defining a CRON-formatted schedule used for defining a freshness assertion.
+*/
+record FreshnessCronSchedule {
+  /**
+   * A cron-formatted execution interval, as a cron string, e.g. 1 * * * *
+   */
+  cron: string
+
+  /**
+   * Timezone in which the cron interval applies, e.g. America/Los Angeles
+   */
+  timezone: string
+
+  /**
+   * An optional offset in milliseconds to SUBTRACT from the timestamp generated by the cron schedule
+   * to generate the lower bounds of the "freshness window", or the window of time in which an event must have occurred in order for the Freshness check
+   * to be considering passing.
+   *
+   * If left empty, the start of the SLA window will be the _end_ of the previously evaluated Freshness window.
+   */
+   windowStartOffsetMs: optional long
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessFieldKind.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessFieldKind.pdl
new file mode 100644
index 0000000000000..7b25589e500da
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessFieldKind.pdl
@@ -0,0 +1,17 @@
+namespace com.linkedin.assertion
+
+enum FreshnessFieldKind {
+  /**
+  * Determine that a change has occurred by inspecting an last modified field which
+  * represents the last time at which a row was changed.
+  */
+  LAST_MODIFIED,
+  /**
+   * Determine that a change has occurred by inspecting a field which should be tracked as the
+   * "high watermark" for the table. This should be an ascending number or date field.
+   *
+   * If rows with this column have not been added since the previous check
+   * then the Freshness Assertion will fail.
+   */
+  HIGH_WATERMARK
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessFieldSpec.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessFieldSpec.pdl
new file mode 100644
index 0000000000000..04acd1c71352d
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessFieldSpec.pdl
@@ -0,0 +1,14 @@
+namespace com.linkedin.assertion
+
+import com.linkedin.schema.SchemaFieldSpec
+
+
+/**
+* Lightweight spec used for referencing a particular schema field.
+**/
+record FreshnessFieldSpec includes SchemaFieldSpec {
+  /**
+   * The type of the field being used to verify the Freshness Assertion.
+   */
+  kind: optional FreshnessFieldKind
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentFieldTransformer.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentFieldTransformer.pdl
new file mode 100644
index 0000000000000..d1d3e7b23b666
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentFieldTransformer.pdl
@@ -0,0 +1,60 @@
+namespace com.linkedin.assertion
+
+/**
+* The definition of the transformer function  that should be applied to a given field / column value in a dataset
+* in order to determine the segment or bucket that it belongs to, which in turn is used to evaluate
+* volume assertions.
+*/
+record IncrementingSegmentFieldTransformer {
+  /**
+  * A 'standard' transformer type. Note that not all source systems will support all operators.
+  */
+  type: enum IncrementingSegmentFieldTransformerType {
+    /**
+    * Rounds a timestamp (in seconds) down to the start of the month.
+    */
+    TIMESTAMP_MS_TO_MINUTE
+
+    /**
+    * Rounds a timestamp (in milliseconds) down to the nearest hour.
+    */
+    TIMESTAMP_MS_TO_HOUR
+
+    /**
+    * Rounds a timestamp (in milliseconds) down to the start of the day.
+    */
+    TIMESTAMP_MS_TO_DATE
+
+    /**
+    * Rounds a timestamp (in milliseconds) down to the start of the month
+    */
+    TIMESTAMP_MS_TO_MONTH
+
+    /**
+    * Rounds a timestamp (in milliseconds) down to the start of the year
+    */
+    TIMESTAMP_MS_TO_YEAR
+
+    /**
+    * Rounds a numeric value down to the nearest integer.
+    */
+    FLOOR
+
+    /**
+    * Rounds a numeric value up to the nearest integer.
+    */
+    CEILING
+
+    /**
+    * A backdoor to provide a native operator type specific to a given source system like
+    * Snowflake, Redshift, BQ, etc.
+    */
+    NATIVE
+  }
+
+  /**
+  * The 'native' transformer type, useful as a back door if a custom operator is required.
+  * This field is required if the type is NATIVE.
+  */
+  nativeType: optional string
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentRowCountChange.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentRowCountChange.pdl
new file mode 100644
index 0000000000000..7c4c73f2ea887
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentRowCountChange.pdl
@@ -0,0 +1,33 @@
+namespace com.linkedin.assertion
+
+
+/**
+* Attributes defining an INCREMENTING_SEGMENT_ROW_COUNT_CHANGE volume assertion.
+*/
+record IncrementingSegmentRowCountChange {
+  /**
+   * A specification of how the 'segment' can be derived using a column and an optional transformer function.
+   */
+  segment: IncrementingSegmentSpec
+
+  /**
+  * The type of the value used to evaluate the assertion: a fixed absolute value or a relative percentage.
+  */
+  type: AssertionValueChangeType
+
+  /**
+  * The operator you'd like to apply to the row count value
+  *
+  * Note that only numeric operators are valid inputs:
+  * GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO,
+  * BETWEEN.
+  */
+  operator: AssertionStdOperator
+
+  /**
+  * The parameters you'd like to provide as input to the operator.
+  *
+  * Note that only numeric parameter types are valid inputs: NUMBER.
+  */
+  parameters: AssertionStdParameters
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentRowCountTotal.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentRowCountTotal.pdl
new file mode 100644
index 0000000000000..6b035107aae09
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentRowCountTotal.pdl
@@ -0,0 +1,27 @@
+namespace com.linkedin.assertion
+
+/**
+* Attributes defining an INCREMENTING_SEGMENT_ROW_COUNT_TOTAL volume assertion.
+*/
+record IncrementingSegmentRowCountTotal {
+  /**
+   * A specification of how the 'segment' can be derived using a column and an optional transformer function.
+   */
+  segment: IncrementingSegmentSpec
+
+  /**
+  * The operator you'd like to apply.
+  *
+  * Note that only numeric operators are valid inputs:
+  * GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO,
+  * BETWEEN.
+  */
+  operator: AssertionStdOperator
+
+  /**
+  * The parameters you'd like to provide as input to the operator.
+  *
+  * Note that only numeric parameter types are valid inputs: NUMBER.
+  */
+  parameters: AssertionStdParameters
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentSpec.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentSpec.pdl
new file mode 100644
index 0000000000000..eddd0c3da3df7
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/IncrementingSegmentSpec.pdl
@@ -0,0 +1,33 @@
+namespace com.linkedin.assertion
+
+import com.linkedin.schema.SchemaFieldSpec
+
+/**
+* Core attributes required to identify an incrementing segment in a table. This type is mainly useful
+* for tables that constantly increase with new rows being added on a particular cadence (e.g. fact or event tables)
+*
+* An incrementing segment represents a logical chunk of data which is INSERTED
+* into a dataset on a regular interval, along with the presence of a constantly-incrementing column
+* value such as an event time, date partition, or last modified column.
+*
+* An incrementing segment is principally identified by 2 key attributes combined:
+*
+*  1. A field or column that represents the incrementing value. New rows that are inserted will be identified using this column.
+*     Note that the value of this column may not by itself represent the "bucket" or the "segment" in which the row falls.
+*
+*  2. [Optional] An transformer function that may be applied to the selected column value in order
+*     to obtain the final "segment identifier" or "bucket identifier". Rows that have the same value after applying the transformation
+*     will be grouped into the same segment, using which the final value (e.g. row count) will be determined.
+*/
+record IncrementingSegmentSpec {
+  /**
+  * The field to use to generate segments. It must be constantly incrementing as new rows are inserted.
+  */
+  field: SchemaFieldSpec
+
+  /**
+  * Optional transformer function to apply to the field in order to obtain the final segment or bucket identifier.
+  * If not provided, then no operator will be applied to the field. (identity function)
+  */
+  transformer: optional IncrementingSegmentFieldTransformer
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/RowCountChange.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/RowCountChange.pdl
new file mode 100644
index 0000000000000..85a915066f584
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/RowCountChange.pdl
@@ -0,0 +1,27 @@
+namespace com.linkedin.assertion
+
+/**
+* Attributes defining a ROW_COUNT_CHANGE volume assertion.
+*/
+record RowCountChange {
+  /**
+  * The type of the value used to evaluate the assertion: a fixed absolute value or a relative percentage.
+  */
+  type: AssertionValueChangeType
+
+  /**
+  * The operator you'd like to apply.
+  *
+  * Note that only numeric operators are valid inputs:
+  * GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO,
+  * BETWEEN.
+  */
+  operator: AssertionStdOperator
+
+  /**
+  * The parameters you'd like to provide as input to the operator.
+  *
+  * Note that only numeric parameter types are valid inputs: NUMBER.
+  */
+  parameters: AssertionStdParameters
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/RowCountTotal.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/RowCountTotal.pdl
new file mode 100644
index 0000000000000..f691f15f62e04
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/RowCountTotal.pdl
@@ -0,0 +1,22 @@
+namespace com.linkedin.assertion
+
+/**
+* Attributes defining a ROW_COUNT_TOTAL volume assertion.
+*/
+record RowCountTotal {
+  /**
+  * The operator you'd like to apply.
+  *
+  * Note that only numeric operators are valid inputs:
+  * GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO,
+  * BETWEEN.
+  */
+  operator: AssertionStdOperator
+
+  /**
+  * The parameters you'd like to provide as input to the operator.
+  *
+  * Note that only numeric parameter types are valid inputs: NUMBER.
+  */
+  parameters: AssertionStdParameters
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/SchemaAssertionInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/SchemaAssertionInfo.pdl
new file mode 100644
index 0000000000000..fd246e0c7cfc4
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/SchemaAssertionInfo.pdl
@@ -0,0 +1,29 @@
+namespace com.linkedin.assertion
+
+import com.linkedin.common.Urn
+import com.linkedin.schema.SchemaMetadata
+
+/**
+* Attributes that are applicable to schema assertions
+**/
+record SchemaAssertionInfo {
+    /**
+    * The entity targeted by the assertion
+    */
+    @Searchable = {
+      "fieldType": "URN"
+    }
+    @Relationship = {
+      "name": "Asserts",
+      "entityTypes": [ "dataset", "dataJob" ]
+    }
+    entity: Urn
+
+    /**
+     * A definition of the expected structure for the asset
+     *
+     * Note that many of the fields of this model, especially those related to metadata (tags, terms)
+     * will go unused in this context.
+     */
+     schema: SchemaMetadata
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/VolumeAssertionInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/VolumeAssertionInfo.pdl
new file mode 100644
index 0000000000000..327b76f95762e
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/VolumeAssertionInfo.pdl
@@ -0,0 +1,82 @@
+namespace com.linkedin.assertion
+
+import com.linkedin.common.Urn
+import com.linkedin.dataset.DatasetFilter
+
+/**
+* Attributes defining a dataset Volume Assertion
+*/
+record VolumeAssertionInfo {
+    /**
+     * The type of the freshness assertion being monitored.
+     */
+    @Searchable = {}
+    type: enum VolumeAssertionType {
+      /**
+       * A volume assertion that is evaluated against the total row count of a dataset.
+       */
+       ROW_COUNT_TOTAL
+       /**
+       * A volume assertion that is evaluated against an incremental row count of a dataset,
+       * or a row count change.
+       */
+       ROW_COUNT_CHANGE
+       /**
+        * A volume assertion that checks the latest "segment" in a table based on an incrementing
+        * column to check whether it's row count falls into a particular range.
+        *
+        * This can be used to monitor the row count of an incrementing date-partition column segment.
+        */
+       INCREMENTING_SEGMENT_ROW_COUNT_TOTAL
+       /**
+        * A volume assertion that compares the row counts in neighboring "segments" or "partitions"
+        * of an incrementing column.
+        * This can be used to track changes between subsequent date partition
+        * in a table, for example.
+        */
+       INCREMENTING_SEGMENT_ROW_COUNT_CHANGE
+    }
+
+    /**
+    * The entity targeted by this Volume check.
+    */
+    @Searchable = {
+      "fieldType": "URN"
+    }
+    @Relationship = {
+      "name": "Asserts",
+      "entityTypes": [ "dataset" ]
+    }
+    entity: Urn
+
+    /**
+    * Produce FAILURE Assertion Result if the row count of the asset does not meet specific requirements.
+    * Required if type is 'ROW_COUNT_TOTAL'
+    */
+    rowCountTotal: optional RowCountTotal
+
+    /**
+    * Produce FAILURE Assertion Result if the delta row count of the asset does not meet specific requirements
+    * within a given period of time.
+    * Required if type is 'ROW_COUNT_CHANGE'
+    */
+    rowCountChange: optional RowCountChange
+
+    /**
+    * Produce FAILURE Assertion Result if the asset's latest incrementing segment row count total
+    * does not meet specific requirements. Required if type is 'INCREMENTING_SEGMENT_ROW_COUNT_TOTAL'
+    */
+    incrementingSegmentRowCountTotal: optional IncrementingSegmentRowCountTotal
+
+    /**
+    * Produce FAILURE Assertion Result if the asset's incrementing segment row count delta
+    * does not meet specific requirements. Required if type is 'INCREMENTING_SEGMENT_ROW_COUNT_CHANGE'
+    */
+    incrementingSegmentRowCountChange: optional IncrementingSegmentRowCountChange
+
+    /**
+     * A definition of the specific filters that should be applied, when performing monitoring.
+     * If not provided, there is no filter, and the full table is under consideration.
+     */
+    filter: optional DatasetFilter
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataContractProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataContractProperties.pdl
new file mode 100644
index 0000000000000..a623f585df30c
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataContractProperties.pdl
@@ -0,0 +1,59 @@
+namespace com.linkedin.datacontract
+
+import com.linkedin.common.Urn
+
+/**
+ * Information about a data contract
+ */
+@Aspect = {
+  "name": "dataContractProperties"
+}
+record DataContractProperties {
+  /**
+   * The entity that this contract is associated with. Currently, we only support Dataset contracts, but
+   * in the future we may also support Data Product level contracts.
+   */
+  @Relationship = {
+    "name": "ContractFor",
+    "entityTypes": [ "dataset" ]
+  }
+  entity: Urn
+
+  /**
+   * An optional set of schema contracts. If this is a dataset contract, there will only be one.
+   */
+   @Relationship = {
+     "/*/assertion": {
+        "name": "IncludesSchemaAssertion",
+        "entityTypes": [ "assertion" ]
+     }
+   }
+  schema: optional array[SchemaContract]
+
+  /**
+   * An optional set of FRESHNESS contracts. If this is a dataset contract, there will only be one.
+   */
+   @Relationship = {
+     "/*/assertion": {
+        "name": "IncludesFreshnessAssertion",
+        "entityTypes": [ "assertion" ]
+     }
+   }
+  freshness: optional array[FreshnessContract]
+
+  /**
+   * An optional set of Data Quality contracts, e.g. table and column level contract constraints.
+   */
+   @Relationship = {
+     "/*/assertion": {
+        "name": "IncludesDataQualityAssertion",
+        "entityTypes": [ "assertion" ]
+     }
+   }
+  dataQuality: optional array[DataQualityContract]
+
+  /**
+   * YAML-formatted contract definition
+   */
+  rawContract: optional string
+}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataContractStatus.pdl b/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataContractStatus.pdl
new file mode 100644
index 0000000000000..d61fb191ae53d
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataContractStatus.pdl
@@ -0,0 +1,27 @@
+namespace com.linkedin.datacontract
+
+import com.linkedin.common.Urn
+import com.linkedin.common.CustomProperties
+
+/**
+ * Information about the status of a data contract
+ */
+@Aspect = {
+  "name": "dataContractStatus"
+}
+record DataContractStatus includes CustomProperties {
+  /**
+   * The latest state of the data contract
+   */
+   @Searchable = {}
+   state: enum DataContractState {
+    /**
+    * The data contract is active.
+    */
+    ACTIVE
+    /**
+    * The data contract is pending implementation.
+    */
+    PENDING
+   }
+}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataQualityContract.pdl b/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataQualityContract.pdl
new file mode 100644
index 0000000000000..273d2c2a56f95
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataQualityContract.pdl
@@ -0,0 +1,16 @@
+namespace com.linkedin.datacontract
+
+import com.linkedin.common.Urn
+
+
+/**
+ * A data quality contract pertaining to a physical data asset
+ * Data Quality contracts are used to make assertions about data quality metrics for a physical data asset
+ */
+record DataQualityContract {
+  /**
+   * The assertion representing the Data Quality contract.
+   * E.g. a table or column-level assertion.
+   */
+  assertion: Urn
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/datacontract/FreshnessContract.pdl b/metadata-models/src/main/pegasus/com/linkedin/datacontract/FreshnessContract.pdl
new file mode 100644
index 0000000000000..8cfa66846d505
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/datacontract/FreshnessContract.pdl
@@ -0,0 +1,13 @@
+namespace com.linkedin.datacontract
+
+import com.linkedin.common.Urn
+
+/**
+ * A contract pertaining to the operational SLAs of a physical data asset
+ */
+record FreshnessContract {
+  /**
+   * The assertion representing the SLA contract.
+   */
+  assertion: Urn
+}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/datacontract/SchemaContract.pdl b/metadata-models/src/main/pegasus/com/linkedin/datacontract/SchemaContract.pdl
new file mode 100644
index 0000000000000..6c11e0da5b128
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/datacontract/SchemaContract.pdl
@@ -0,0 +1,13 @@
+namespace com.linkedin.datacontract
+
+import com.linkedin.common.Urn
+
+/**
+ * Expectations for a logical schema
+ */
+record SchemaContract {
+  /**
+   * The assertion representing the schema contract.
+   */
+  assertion: Urn
+}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataset/DatasetFilter.pdl b/metadata-models/src/main/pegasus/com/linkedin/dataset/DatasetFilter.pdl
new file mode 100644
index 0000000000000..6823398f79f3d
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/dataset/DatasetFilter.pdl
@@ -0,0 +1,30 @@
+namespace com.linkedin.dataset
+
+/**
+ * A definition of filters that should be used when
+ * querying an external Dataset or Table.
+ *
+ * Note that this models should NOT be used for working with
+ * search / filter on DataHub Platform itself.
+ */
+record DatasetFilter {
+  /**
+   * How the partition will be represented in this model.
+   *
+   * In the future, we'll likely add support for more structured
+   * predicates.
+   */
+  type: enum DatasetFilterType {
+    /**
+     * The partition is represented as a an opaque, raw SQL
+     * clause.
+     */
+    SQL
+  }
+
+  /**
+   * The raw where clause string which will be used for monitoring.
+   * Required if the type is SQL.
+   */
+  sql: optional string
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/key/DataContractKey.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/key/DataContractKey.pdl
new file mode 100644
index 0000000000000..f1d4a709cd6bf
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/key/DataContractKey.pdl
@@ -0,0 +1,14 @@
+namespace com.linkedin.metadata.key
+
+/**
+ * Key for a Data Contract
+ */
+@Aspect = {
+  "name": "dataContractKey"
+}
+record DataContractKey {
+  /**
+  * Unique id for the contract
+  */
+  id: string
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaFieldSpec.pdl b/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaFieldSpec.pdl
new file mode 100644
index 0000000000000..e875ff7a84403
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaFieldSpec.pdl
@@ -0,0 +1,21 @@
+namespace com.linkedin.schema
+
+/**
+* Lightweight spec used for referencing a particular schema field.
+**/
+record SchemaFieldSpec {
+  /**
+  * The field path
+  */
+  path: string
+
+  /**
+  * The DataHub standard schema field type.
+  */
+  type: string
+
+  /**
+  * The native field type
+  */
+  nativeType: string
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml
index 56fc5f6568eb7..11d0f74305d7b 100644
--- a/metadata-models/src/main/resources/entity-registry.yml
+++ b/metadata-models/src/main/resources/entity-registry.yml
@@ -262,6 +262,7 @@ entities:
       - assertionInfo
       - dataPlatformInstance
       - assertionRunEvent
+      - assertionActions
       - status
   - name: dataHubRetention
     category: internal
@@ -457,4 +458,12 @@ entities:
     aspects:
       - ownershipTypeInfo
       - status
+  - name: dataContract
+    category: core
+    keyAspect: dataContractKey
+    aspects:
+      - dataContractProperties
+      - dataContractStatus
+      - status
+
 events:

From 2bc685d3b98f879d1c3051a8484a78489359d910 Mon Sep 17 00:00:00 2001
From: Aseem Bansal <asmbansal2@gmail.com>
Date: Thu, 5 Oct 2023 09:31:32 +0530
Subject: [PATCH 092/156] ci: tweak ci to decrease wait time of devs (#8945)

---
 .github/workflows/build-and-test.yml               | 14 ++++++++++----
 .github/workflows/metadata-ingestion.yml           |  7 ++++---
 .../integration/powerbi/test_admin_only_api.py     |  3 +++
 .../tests/integration/powerbi/test_m_parser.py     |  2 +-
 .../tests/integration/powerbi/test_powerbi.py      |  2 +-
 .../tests/integration/snowflake/test_snowflake.py  |  4 ++--
 .../integration/tableau/test_tableau_ingest.py     |  2 +-
 .../tests/integration/trino/test_trino.py          |  5 ++---
 8 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
index 3f409878b191f..96b9bb2a14933 100644
--- a/.github/workflows/build-and-test.yml
+++ b/.github/workflows/build-and-test.yml
@@ -27,8 +27,8 @@ jobs:
         command:
           [
             # metadata-ingestion and airflow-plugin each have dedicated build jobs
-            "./gradlew build -x :metadata-ingestion:build -x :metadata-ingestion:check -x docs-website:build -x :metadata-integration:java:spark-lineage:test -x :metadata-io:test -x :metadata-ingestion-modules:airflow-plugin:build -x :metadata-ingestion-modules:airflow-plugin:check -x :datahub-frontend:build -x :datahub-web-react:build --parallel",
-            "./gradlew :datahub-frontend:build :datahub-web-react:build --parallel",
+            "except_metadata_ingestion",
+            "frontend"
           ]
         timezone:
           [
@@ -53,9 +53,15 @@ jobs:
         with:
           python-version: "3.10"
           cache: pip
-      - name: Gradle build (and test)
+      - name: Gradle build (and test) for metadata ingestion
+        # we only need the timezone runs for frontend tests
+        if: ${{  matrix.command == 'except_metadata_ingestion' && matrix.timezone == 'America/New_York' }}
         run: |
-          ${{ matrix.command }}
+          ./gradlew build -x :metadata-ingestion:build -x :metadata-ingestion:check -x docs-website:build -x :metadata-integration:java:spark-lineage:test -x :metadata-io:test -x :metadata-ingestion-modules:airflow-plugin:build -x :metadata-ingestion-modules:airflow-plugin:check -x :datahub-frontend:build -x :datahub-web-react:build --parallel
+      - name: Gradle build (and test) for frontend
+        if: ${{  matrix.command == 'frontend' }}
+        run: |
+          ./gradlew :datahub-frontend:build :datahub-web-react:build --parallel
         env:
           NODE_OPTIONS: "--max-old-space-size=3072"
       - uses: actions/upload-artifact@v3
diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml
index 8d56a0adf5bd5..dea4603868f8e 100644
--- a/.github/workflows/metadata-ingestion.yml
+++ b/.github/workflows/metadata-ingestion.yml
@@ -34,7 +34,6 @@ jobs:
         python-version: ["3.7", "3.10"]
         command:
           [
-            "lint",
             "testQuick",
             "testIntegrationBatch0",
             "testIntegrationBatch1",
@@ -54,6 +53,9 @@ jobs:
         run: ./metadata-ingestion/scripts/install_deps.sh
       - name: Install package
         run: ./gradlew :metadata-ingestion:installPackageOnly
+      - name: Run lint alongwith testQuick
+        if: ${{  matrix.command == 'testQuick' }}
+        run: ./gradlew :metadata-ingestion:lint
       - name: Run metadata-ingestion tests
         run: ./gradlew :metadata-ingestion:${{ matrix.command }}
       - name: Debug info
@@ -65,7 +67,6 @@ jobs:
           docker image ls
           docker system df
       - uses: actions/upload-artifact@v3
-        if: ${{ always() && matrix.command != 'lint' }}
         with:
           name: Test Results (metadata ingestion ${{ matrix.python-version }})
           path: |
@@ -73,7 +74,7 @@ jobs:
             **/build/test-results/test/**
             **/junit.*.xml
       - name: Upload coverage to Codecov
-        if: ${{ always() && matrix.python-version == '3.10' && matrix.command != 'lint' }}
+        if: ${{ always() && matrix.python-version == '3.10' }}
         uses: codecov/codecov-action@v3
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py b/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py
index f95fd81681a9a..6f45dcf97f1dd 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py
@@ -3,11 +3,14 @@
 from typing import Any, Dict
 from unittest import mock
 
+import pytest
 from freezegun import freeze_time
 
 from datahub.ingestion.run.pipeline import Pipeline
 from tests.test_helpers import mce_helpers
 
+pytestmark = pytest.mark.integration_batch_2
+
 FROZEN_TIME = "2022-02-03 07:00:00"
 
 
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 2e9c02ef759a5..e3cc6c8101650 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -19,7 +19,7 @@
 from datahub.ingestion.source.powerbi.m_query.resolver import DataPlatformTable, Lineage
 from datahub.utilities.sqlglot_lineage import ColumnLineageInfo, DownstreamColumnRef
 
-pytestmark = pytest.mark.slow
+pytestmark = pytest.mark.integration_batch_2
 
 M_QUERIES = [
     'let\n    Source = Snowflake.Databases("bu10758.ap-unknown-2.fakecomputing.com","PBI_TEST_WAREHOUSE_PROD",[Role="PBI_TEST_MEMBER"]),\n    PBI_TEST_Database = Source{[Name="PBI_TEST",Kind="Database"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name="TEST",Kind="Schema"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name="TESTTABLE",Kind="Table"]}[Data]\nin\n    TESTTABLE_Table',
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index b0695e3ea9954..7232d2a38da1d 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -21,7 +21,7 @@
 )
 from tests.test_helpers import mce_helpers
 
-pytestmark = pytest.mark.slow
+pytestmark = pytest.mark.integration_batch_2
 FROZEN_TIME = "2022-02-03 07:00:00"
 
 
diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py
index dec50aefd19f0..2c77ace8b53e5 100644
--- a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py
+++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py
@@ -30,6 +30,8 @@
 from tests.integration.snowflake.common import FROZEN_TIME, default_query_results
 from tests.test_helpers import mce_helpers
 
+pytestmark = pytest.mark.integration_batch_2
+
 
 def random_email():
     return (
@@ -55,7 +57,6 @@ def random_cloud_region():
     )
 
 
-@pytest.mark.integration
 def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
     test_resources_dir = pytestconfig.rootpath / "tests/integration/snowflake"
 
@@ -183,7 +184,6 @@ def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration
 def test_snowflake_private_link(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
     test_resources_dir = pytestconfig.rootpath / "tests/integration/snowflake"
 
diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
index 53b8519a886d3..c31867f5aa904 100644
--- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
+++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
@@ -757,7 +757,7 @@ def test_tableau_no_verify():
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.slow
+@pytest.mark.integration_batch_2
 def test_tableau_signout_timeout(pytestconfig, tmp_path, mock_datahub_graph):
     enable_logging()
     output_file_name: str = "tableau_signout_timeout_mces.json"
diff --git a/metadata-ingestion/tests/integration/trino/test_trino.py b/metadata-ingestion/tests/integration/trino/test_trino.py
index 22e5f6f91a06e..177c273c0d242 100644
--- a/metadata-ingestion/tests/integration/trino/test_trino.py
+++ b/metadata-ingestion/tests/integration/trino/test_trino.py
@@ -13,6 +13,8 @@
 from tests.test_helpers import fs_helpers, mce_helpers
 from tests.test_helpers.docker_helpers import wait_for_port
 
+pytestmark = pytest.mark.integration_batch_1
+
 FROZEN_TIME = "2021-09-23 12:00:00"
 
 data_platform = "trino"
@@ -51,7 +53,6 @@ def loaded_trino(trino_runner):
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration
 @pytest.mark.xfail
 def test_trino_ingest(
     loaded_trino, test_resources_dir, pytestconfig, tmp_path, mock_time
@@ -111,7 +112,6 @@ def test_trino_ingest(
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration
 def test_trino_hive_ingest(
     loaded_trino, test_resources_dir, pytestconfig, tmp_path, mock_time
 ):
@@ -167,7 +167,6 @@ def test_trino_hive_ingest(
 
 
 @freeze_time(FROZEN_TIME)
-@pytest.mark.integration
 def test_trino_instance_ingest(
     loaded_trino, test_resources_dir, pytestconfig, tmp_path, mock_time
 ):

From 2fcced6db9d30228c421d0773c8249c889cd0d9f Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Thu, 5 Oct 2023 09:31:57 +0530
Subject: [PATCH 093/156] docs(ingest): add permissions required for athena
 ingestion (#8948)

---
 .../docs/sources/athena/athena_pre.md         | 72 +++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 metadata-ingestion/docs/sources/athena/athena_pre.md

diff --git a/metadata-ingestion/docs/sources/athena/athena_pre.md b/metadata-ingestion/docs/sources/athena/athena_pre.md
new file mode 100644
index 0000000000000..a56457d3f84fc
--- /dev/null
+++ b/metadata-ingestion/docs/sources/athena/athena_pre.md
@@ -0,0 +1,72 @@
+### Prerequisities
+
+In order to execute this source, you will need to create a policy with below permissions and attach it to the the aws role or credentials used in ingestion recipe.
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Sid": "VisualEditor0",
+      "Effect": "Allow",
+      "Action": [
+        "athena:GetTableMetadata", 
+        "athena:StartQueryExecution", 
+        "athena:GetQueryResults", 
+        "athena:GetDatabase", 
+        "athena:ListDataCatalogs",
+        "athena:GetDataCatalog", 
+        "athena:ListQueryExecutions", 
+        "athena:GetWorkGroup", 
+        "athena:StopQueryExecution", 
+        "athena:GetQueryResultsStream", 
+        "athena:ListDatabases", 
+        "athena:GetQueryExecution", 
+        "athena:ListTableMetadata", 
+        "athena:BatchGetQueryExecution", 
+        "glue:GetTables", 
+        "glue:GetDatabases", 
+        "glue:GetTable",
+        "glue:GetDatabase",
+        "glue:SearchTables",
+        "glue:GetTableVersions",
+        "glue:GetTableVersion",
+        "glue:GetPartition", 
+        "glue:GetPartitions", 
+        "s3:GetObject",
+        "s3:ListBucket",
+        "s3:GetBucketLocation",
+      ],
+      "Resource": [
+        "arn:aws:athena:${region-id}:${account-id}:datacatalog/*",
+        "arn:aws:athena:${region-id}:${account-id}:workgroup/*",
+        "arn:aws:glue:${region-id}:${account-id}:tableVersion/*/*/*",
+        "arn:aws:glue:${region-id}:${account-id}:table/*/*", 
+        "arn:aws:glue:${region-id}:${account-id}:catalog", 
+        "arn:aws:glue:${region-id}:${account-id}:database/*", 
+        "arn:aws:s3:::${datasets-bucket}",
+        "arn:aws:s3:::${datasets-bucket}/*"
+      ]
+    },
+    {
+      "Sid": "VisualEditor1",
+      "Effect": "Allow",
+      "Action": [
+        "s3:PutObject",
+        "s3:GetObject",
+        "s3:ListBucketMultipartUploads",
+        "s3:AbortMultipartUpload",
+        "s3:ListBucket",
+        "s3:GetBucketLocation",
+        "s3:ListMultipartUploadParts"
+      ],
+      "Resource": [
+        "arn:aws:s3:::${athena-query-result-bucket}/*",
+        "arn:aws:s3:::${athena-query-result-bucket}"
+      ]
+    },
+  ]
+}
+```
+
+Replace `${var}` with appropriate values as per your athena setup.
\ No newline at end of file

From 6310e51eb09711e98d86625578127349c5144c66 Mon Sep 17 00:00:00 2001
From: Jinlin Yang <86577891+jinlintt@users.noreply.github.com>
Date: Wed, 4 Oct 2023 21:03:31 -0700
Subject: [PATCH 094/156] feat(ingestion/dynamodb): implement pagination for
 list_tables (#8910)

---
 .../app/ingest/source/builder/sources.json    |  4 +-
 .../docs/sources/dynamodb/dynamodb_post.md    | 13 ++-
 .../docs/sources/dynamodb/dynamodb_pre.md     |  6 +-
 .../docs/sources/dynamodb/dynamodb_recipe.yml | 16 ++--
 .../ingestion/source/dynamodb/dynamodb.py     | 85 +++++++++++--------
 5 files changed, 65 insertions(+), 59 deletions(-)

diff --git a/datahub-web-react/src/app/ingest/source/builder/sources.json b/datahub-web-react/src/app/ingest/source/builder/sources.json
index 1bd5b6f1f768b..b18384909c33f 100644
--- a/datahub-web-react/src/app/ingest/source/builder/sources.json
+++ b/datahub-web-react/src/app/ingest/source/builder/sources.json
@@ -130,7 +130,7 @@
         "name": "dynamodb",
         "displayName": "DynamoDB",
         "docsUrl": "https://datahubproject.io/docs/metadata-ingestion/",
-        "recipe": "source:\n    type: dynamodb\n    config:\n        platform_instance: \"AWS_ACCOUNT_ID\"\n        aws_access_key_id : '${AWS_ACCESS_KEY_ID}'\n        aws_secret_access_key : '${AWS_SECRET_ACCESS_KEY}'\n        # User could use the below option to provide a list of primary keys of a table in dynamodb format,\n        # those items from given primary keys will be included when we scan the table.\n        # For each table we can retrieve up to 16 MB of data, which can contain as many as 100 items.\n        # We'll enforce the the primary keys list size not to exceed 100\n        # The total items we'll try to retrieve in these two scenarios:\n        # 1. If user don't specify include_table_item: we'll retrieve up to 100 items\n        # 2. If user specifies include_table_item: we'll retrieve up to 100 items plus user specified items in\n        # the table, with a total not more than 200 items\n        # include_table_item:\n        #   table_name:\n        #     [\n        #       {\n        #         'partition_key_name': { 'attribute_type': 'attribute_value' },\n        #         'sort_key_name': { 'attribute_type': 'attribute_value' },\n        #       },\n        #     ]"
+        "recipe": "source:\n    type: dynamodb\n    config:\n        platform_instance: \"AWS_ACCOUNT_ID\"\n        aws_access_key_id : '${AWS_ACCESS_KEY_ID}'\n        aws_secret_access_key : '${AWS_SECRET_ACCESS_KEY}'\n        # If there are items that have most representative fields of the table, users could use the\n        # `include_table_item` option to provide a list of primary keys of the table in dynamodb format.\n        # For each `region.table`, the list of primary keys can be at most 100.\n        # We include these items in addition to the first 100 items in the table when we scan it.\n        # include_table_item:\n        #   region.table_name:\n        #     [\n        #       {\n        #         'partition_key_name': { 'attribute_type': 'attribute_value' },\n        #         'sort_key_name': { 'attribute_type': 'attribute_value' },\n        #       },\n        #     ]"
     },
     {
         "urn": "urn:li:dataPlatform:glue",
@@ -223,4 +223,4 @@
         "docsUrl": "https://datahubproject.io/docs/metadata-ingestion/",
         "recipe": "source:\n  type: <source-type>\n  config:\n    # Source-type specifics config\n    <source-configs>"
     }
-]
\ No newline at end of file
+]
diff --git a/metadata-ingestion/docs/sources/dynamodb/dynamodb_post.md b/metadata-ingestion/docs/sources/dynamodb/dynamodb_post.md
index 7f9a0324c7bc6..a1c0a6e2d4d21 100644
--- a/metadata-ingestion/docs/sources/dynamodb/dynamodb_post.md
+++ b/metadata-ingestion/docs/sources/dynamodb/dynamodb_post.md
@@ -1,21 +1,18 @@
-## Limitations
-
-For each region, the list table operation returns maximum number 100 tables, we need to further improve it by implementing pagination for listing tables
-
 ## Advanced Configurations
 
 ### Using `include_table_item` config
 
-If there are items that have most representative fields of the table, user could use the `include_table_item` option to provide a list of primary keys of a table in dynamodb format, those items from given primary keys will be included when we scan the table.
+If there are items that have most representative fields of the table, users could use the `include_table_item` option to provide a list of primary keys of the table in dynamodb format. We include these items in addition to the first 100 items in the table when we scan it.
 
-Take [AWS DynamoDB Developer Guide Example tables and data](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/AppendixSampleTables.html) as an example, if user has a table `Reply` with composite primary key `Id` and `ReplyDateTime`, user can use `include_table_item` to include 2 items as following:
+Take [AWS DynamoDB Developer Guide Example tables and data](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/AppendixSampleTables.html) as an example, if a account has a table `Reply` in the `us-west-2` region with composite primary key `Id` and `ReplyDateTime`, users can use `include_table_item` to include 2 items as following:
 
 Example:
 
 ```yml
-# put the table name and composite key in DynamoDB format
+# The table name should be in the format of region.table_name
+# The primary keys should be in the DynamoDB format
 include_table_item:
-  Reply:
+  us-west-2.Reply:
     [
       {
         "ReplyDateTime": { "S": "2015-09-22T19:58:22.947Z" },
diff --git a/metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md b/metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md
index a48e8d5be04aa..598d0ecdb3786 100644
--- a/metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md
+++ b/metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md
@@ -1,8 +1,8 @@
 ### Prerequisities
 
-In order to execute this source, you will need to create access key and secret keys that have DynamoDB read access. You can create these policies and attach to your account or can ask your account admin to attach these policies to your account.
+In order to execute this source, you need to attach the `AmazonDynamoDBReadOnlyAccess` policy to a user in your AWS account. Then create an API access key and secret for the user.
 
-For access key permissions, you can create a policy with permissions below and attach to your account, you can find more details in [Managing access keys for IAM users](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html)
+For a user to be able to create API access key, it needs the following access key permissions. Your AWS account admin can create a policy with these permissions and attach to the user, you can find more details in [Managing access keys for IAM users](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html)
 
 ```json
 {
@@ -22,5 +22,3 @@ For access key permissions, you can create a policy with permissions below and a
   ]
 }
 ```
-
-For DynamoDB read access, you can simply attach AWS managed policy `AmazonDynamoDBReadOnlyAccess` to your account, you can find more details in [Attaching a policy to an IAM user group](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_groups_manage_attach-policy.html)
diff --git a/metadata-ingestion/docs/sources/dynamodb/dynamodb_recipe.yml b/metadata-ingestion/docs/sources/dynamodb/dynamodb_recipe.yml
index bd41637907b5c..4f4edc9a7d496 100644
--- a/metadata-ingestion/docs/sources/dynamodb/dynamodb_recipe.yml
+++ b/metadata-ingestion/docs/sources/dynamodb/dynamodb_recipe.yml
@@ -4,16 +4,14 @@ source:
     platform_instance: "AWS_ACCOUNT_ID"
     aws_access_key_id: "${AWS_ACCESS_KEY_ID}"
     aws_secret_access_key: "${AWS_SECRET_ACCESS_KEY}"
-    # User could use the below option to provide a list of primary keys of a table in dynamodb format,
-    # those items from given primary keys will be included when we scan the table.
-    # For each table we can retrieve up to 16 MB of data, which can contain as many as 100 items.
-    # We'll enforce the the primary keys list size not to exceed 100
-    # The total items we'll try to retrieve in these two scenarios:
-    # 1. If user don't specify include_table_item: we'll retrieve up to 100 items
-    # 2. If user specifies include_table_item: we'll retrieve up to 100 items plus user specified items in
-    # the table, with a total not more than 200 items
+    #
+    # If there are items that have most representative fields of the table, users could use the
+    # `include_table_item` option to provide a list of primary keys of the table in dynamodb format.
+    # For each `region.table`, the list of primary keys can be at most 100.
+    # We include these items in addition to the first 100 items in the table when we scan it.
+    #
     # include_table_item:
-    #   table_name:
+    #   region.table_name:
     #     [
     #       {
     #         "partition_key_name": { "attribute_type": "attribute_value" },
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py b/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py
index 6b7c118373673..d7f3dfb9279fb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py
@@ -1,5 +1,5 @@
 import logging
-from dataclasses import field
+from dataclasses import dataclass, field
 from typing import Any, Counter, Dict, Iterable, List, Optional, Type, Union
 
 import boto3
@@ -79,12 +79,13 @@ class DynamoDBConfig(DatasetSourceConfigMixin, StatefulIngestionConfigBase):
 
     table_pattern: AllowDenyPattern = Field(
         default=AllowDenyPattern.allow_all(),
-        description="regex patterns for tables to filter in ingestion.",
+        description="Regex patterns for tables to filter in ingestion. The table name format is 'region.table'",
     )
     # Custom Stateful Ingestion settings
     stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
 
 
+@dataclass
 class DynamoDBSourceReport(StaleEntityRemovalSourceReport):
     filtered: List[str] = field(default_factory=list)
 
@@ -175,39 +176,30 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
 
         # traverse databases in sorted order so output is consistent
         for region in dynamodb_regions:
-            try:
-                # create a new dynamodb client for each region,
-                # it seems for one client we could only list the table of one specific region,
-                # the list_tables() method don't take any config that related to region
-                # TODO: list table returns maximum number 100, need to implement pagination here
-                dynamodb_client = boto3.client(
-                    "dynamodb",
-                    region_name=region,
-                    aws_access_key_id=self.config.aws_access_key_id
-                    if self.config.aws_access_key_id
-                    else None,
-                    aws_secret_access_key=self.config.aws_secret_access_key.get_secret_value()
-                    if self.config.aws_secret_access_key
-                    else None,
-                )
-                table_names: List[str] = dynamodb_client.list_tables()["TableNames"]
-            except Exception as ex:
-                # TODO: If regions is config input then this would be self.report.report_warning,
-                # we can create dynamodb client to take aws region or regions as user input
-                logger.info(f"exception happen in region {region}, skipping: {ex}")
-                continue
-            for table_name in sorted(table_names):
-                if not self.config.table_pattern.allowed(table_name):
+            logger.info(f"Processing region {region}")
+            # create a new dynamodb client for each region,
+            # it seems for one client we could only list the table of one specific region,
+            # the list_tables() method don't take any config that related to region
+            dynamodb_client = boto3.client(
+                "dynamodb",
+                region_name=region,
+                aws_access_key_id=self.config.aws_access_key_id,
+                aws_secret_access_key=self.config.aws_secret_access_key.get_secret_value(),
+            )
+
+            for table_name in self._list_tables(dynamodb_client):
+                dataset_name = f"{region}.{table_name}"
+                if not self.config.table_pattern.allowed(dataset_name):
+                    logger.debug(f"skipping table: {dataset_name}")
+                    self.report.report_dropped(dataset_name)
                     continue
+
+                logger.debug(f"Processing table: {dataset_name}")
                 table_info = dynamodb_client.describe_table(TableName=table_name)[
                     "Table"
                 ]
                 account_id = table_info["TableArn"].split(":")[4]
-                if not self.config.table_pattern.allowed(table_name):
-                    self.report.report_dropped(table_name)
-                    continue
                 platform_instance = self.config.platform_instance or account_id
-                dataset_name = f"{region}.{table_name}"
                 dataset_urn = make_dataset_urn_with_platform_instance(
                     platform=self.platform,
                     platform_instance=platform_instance,
@@ -222,7 +214,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
                 )
                 primary_key_dict = self.extract_primary_key_from_key_schema(table_info)
                 table_schema = self.construct_schema_from_dynamodb(
-                    dynamodb_client, table_name
+                    dynamodb_client, region, table_name
                 )
                 schema_metadata = self.construct_schema_metadata(
                     table_name,
@@ -254,9 +246,25 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
                     aspect=platform_instance_aspect,
                 ).as_workunit()
 
+    def _list_tables(
+        self,
+        dynamodb_client: BaseClient,
+    ) -> Iterable[str]:
+        # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb/paginator/ListTables.html
+        try:
+            for page in dynamodb_client.get_paginator("list_tables").paginate():
+                table_names = page.get("TableNames")
+                if table_names:
+                    yield from table_names
+        except Exception as ex:
+            # TODO: If regions is config input then this would be self.report.report_warning,
+            # we can create dynamodb client to take aws region or regions as user input
+            logger.info(f"Exception happened while listing tables, skipping: {ex}")
+
     def construct_schema_from_dynamodb(
         self,
         dynamodb_client: BaseClient,
+        region: str,
         table_name: str,
     ) -> Dict[str, SchemaDescription]:
         """
@@ -275,7 +283,7 @@ def construct_schema_from_dynamodb(
         The MaxItems is the total number of items to return, and PageSize is the size of each page, we are assigning same value
         to these two config. If MaxItems is more than PageSize then we expect MaxItems / PageSize pages in response_iterator will return
         """
-        self.include_table_item_to_schema(dynamodb_client, table_name, schema)
+        self.include_table_item_to_schema(dynamodb_client, region, table_name, schema)
         response_iterator = paginator.paginate(
             TableName=table_name,
             PaginationConfig={
@@ -294,33 +302,38 @@ def construct_schema_from_dynamodb(
     def include_table_item_to_schema(
         self,
         dynamodb_client: Any,
+        region: str,
         table_name: str,
         schema: Dict[str, SchemaDescription],
     ) -> None:
         """
-        It will look up in the config include_table_item dict to see if the current table name exists as key,
+        It will look up in the config include_table_item dict to see if "region.table_name" exists as key,
         if it exists then get the items by primary key from the table and put it to schema
         """
         if self.config.include_table_item is None:
             return
-        if table_name not in self.config.include_table_item.keys():
+        dataset_name = f"{region}.{table_name}"
+        if dataset_name not in self.config.include_table_item.keys():
             return
-        primary_key_list = self.config.include_table_item.get(table_name)
+        primary_key_list = self.config.include_table_item.get(dataset_name)
         assert isinstance(primary_key_list, List)
         if len(primary_key_list) > MAX_PRIMARY_KEYS_SIZE:
             logger.info(
-                f"the provided primary keys list size exceeded the max size for table {table_name}, we'll only process the first {MAX_PRIMARY_KEYS_SIZE} items"
+                f"the provided primary keys list size exceeded the max size for table {dataset_name}, we'll only process the first {MAX_PRIMARY_KEYS_SIZE} items"
             )
             primary_key_list = primary_key_list[0:MAX_PRIMARY_KEYS_SIZE]
         items = []
         response = dynamodb_client.batch_get_item(
             RequestItems={table_name: {"Keys": primary_key_list}}
-        ).get("Responses", None)
+        ).get("Responses")
         if response is None:
             logger.error(
                 f"failed to retrieve item from table {table_name} by the given key {primary_key_list}"
             )
             return
+        logger.debug(
+            f"successfully retrieved {len(primary_key_list)} items based on supplied primary key list"
+        )
         items = response.get(table_name)
 
         self.construct_schema_from_items(items, schema)

From c9309ff1579e31c79d2d8e764a89f7c5e3ff483c Mon Sep 17 00:00:00 2001
From: Shirshanka Das <shirshanka@apache.org>
Date: Thu, 5 Oct 2023 09:07:12 -0700
Subject: [PATCH 095/156] feat(ci): enable ci to run on PR-s targeting all
 branches (#8933)

---
 .github/workflows/airflow-plugin.yml        |  2 +-
 .github/workflows/build-and-test.yml        | 11 +++--------
 .github/workflows/check-datahub-jars.yml    |  9 ++-------
 .github/workflows/close-stale-issues.yml    |  4 +++-
 .github/workflows/code-checks.yml           | 13 ++++---------
 .github/workflows/docker-postgres-setup.yml |  3 +--
 .github/workflows/docker-unified.yml        |  7 +++----
 .github/workflows/documentation.yml         |  2 +-
 .github/workflows/lint-actions.yml          |  4 +++-
 .github/workflows/metadata-ingestion.yml    |  2 +-
 .github/workflows/metadata-io.yml           |  2 +-
 .github/workflows/spark-smoke-test.yml      |  2 +-
 12 files changed, 24 insertions(+), 37 deletions(-)

diff --git a/.github/workflows/airflow-plugin.yml b/.github/workflows/airflow-plugin.yml
index a250bddcc16d1..54042d104d906 100644
--- a/.github/workflows/airflow-plugin.yml
+++ b/.github/workflows/airflow-plugin.yml
@@ -10,7 +10,7 @@ on:
       - "metadata-models/**"
   pull_request:
     branches:
-      - master
+      - "**"
     paths:
       - ".github/**"
       - "metadata-ingestion-modules/airflow-plugin/**"
diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
index 96b9bb2a14933..25f3957e8f086 100644
--- a/.github/workflows/build-and-test.yml
+++ b/.github/workflows/build-and-test.yml
@@ -8,7 +8,7 @@ on:
       - "**.md"
   pull_request:
     branches:
-      - master
+      - "**"
     paths-ignore:
       - "docs/**"
       - "**.md"
@@ -24,17 +24,12 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        command:
-          [
+        command: [
             # metadata-ingestion and airflow-plugin each have dedicated build jobs
             "except_metadata_ingestion",
             "frontend"
           ]
-        timezone:
-          [
-            "UTC",
-            "America/New_York",
-          ]
+        timezone: ["UTC", "America/New_York"]
     runs-on: ubuntu-latest
     timeout-minutes: 60
     steps:
diff --git a/.github/workflows/check-datahub-jars.yml b/.github/workflows/check-datahub-jars.yml
index 841a9ed5f9bc7..9a17a70e7f8d4 100644
--- a/.github/workflows/check-datahub-jars.yml
+++ b/.github/workflows/check-datahub-jars.yml
@@ -10,7 +10,7 @@ on:
       - "**.md"
   pull_request:
     branches:
-      - master
+      - "**"
     paths-ignore:
       - "docker/**"
       - "docs/**"
@@ -28,12 +28,7 @@ jobs:
       max-parallel: 1
       fail-fast: false
       matrix:
-        command:
-          [
-            "datahub-client",
-            "datahub-protobuf",
-            "spark-lineage"
-          ]
+        command: ["datahub-client", "datahub-protobuf", "spark-lineage"]
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
diff --git a/.github/workflows/close-stale-issues.yml b/.github/workflows/close-stale-issues.yml
index a7809087702ac..98e3041f28804 100644
--- a/.github/workflows/close-stale-issues.yml
+++ b/.github/workflows/close-stale-issues.yml
@@ -18,7 +18,9 @@ jobs:
           days-before-issue-stale: 30
           days-before-issue-close: 30
           stale-issue-label: "stale"
-          stale-issue-message: "This issue is stale because it has been open for 30 days with no activity. If you believe this is still an issue on the latest DataHub release please leave a comment with the version that you tested it with. If this is a question/discussion please head to https://slack.datahubproject.io. For feature requests please use https://feature-requests.datahubproject.io"
+          stale-issue-message:
+            "This issue is stale because it has been open for 30 days with no activity. If you believe this is still an issue on the latest DataHub release please leave a comment with the version that you tested it with. If this is a question/discussion please head to https://slack.datahubproject.io.\
+            \ For feature requests please use https://feature-requests.datahubproject.io"
           close-issue-message: "This issue was closed because it has been inactive for 30 days since being marked as stale."
           days-before-pr-stale: -1
           days-before-pr-close: -1
diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml
index 6ce19a5b4616e..e12971b8a6208 100644
--- a/.github/workflows/code-checks.yml
+++ b/.github/workflows/code-checks.yml
@@ -10,7 +10,7 @@ on:
       - ".github/workflows/code-checks.yml"
   pull_request:
     branches:
-      - master
+      - "**"
     paths:
       - "metadata-io/**"
       - "datahub-web-react/**"
@@ -21,17 +21,12 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
   cancel-in-progress: true
 
-
 jobs:
   code_check:
     strategy:
       fail-fast: false
       matrix:
-        command:
-          [
-            "check_event_type.py",
-            "check_policies.py"
-          ]
+        command: ["check_event_type.py", "check_policies.py"]
     name: run code checks
     runs-on: ubuntu-latest
     steps:
@@ -43,5 +38,5 @@ jobs:
         with:
           python-version: "3.10"
       - name: run check ${{ matrix.command }}
-        run: |
-          python .github/scripts/${{ matrix.command }}
\ No newline at end of file
+        run: |-
+          python .github/scripts/${{ matrix.command }}
diff --git a/.github/workflows/docker-postgres-setup.yml b/.github/workflows/docker-postgres-setup.yml
index a5d421d4b7ff5..fda4349f90bf7 100644
--- a/.github/workflows/docker-postgres-setup.yml
+++ b/.github/workflows/docker-postgres-setup.yml
@@ -8,7 +8,7 @@ on:
       - ".github/workflows/docker-postgres-setup.yml"
   pull_request:
     branches:
-      - master
+      - "**"
     paths:
       - "docker/postgres-setup/**"
       - ".github/workflows/docker-postgres-setup.yml"
@@ -61,4 +61,3 @@ jobs:
           context: .
           file: ./docker/postgres-setup/Dockerfile
           platforms: linux/amd64,linux/arm64
-
diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml
index 2aae6bf51529d..8666a5e2e2171 100644
--- a/.github/workflows/docker-unified.yml
+++ b/.github/workflows/docker-unified.yml
@@ -8,7 +8,7 @@ on:
       - "**.md"
   pull_request:
     branches:
-      - master
+      - "**"
     paths-ignore:
       - "docs/**"
       - "**.md"
@@ -545,7 +545,6 @@ jobs:
         id: tag
         run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}" >> $GITHUB_OUTPUT
 
-
   datahub_ingestion_slim_build:
     name: Build and Push DataHub Ingestion Docker Images
     runs-on: ubuntu-latest
@@ -809,8 +808,8 @@ jobs:
           DATAHUB_VERSION: ${{ needs.setup.outputs.unique_tag }}
           DATAHUB_ACTIONS_IMAGE: ${{ env.DATAHUB_INGESTION_IMAGE }}
           ACTIONS_VERSION: ${{ needs.datahub_ingestion_slim_build.outputs.tag }}
-          ACTIONS_EXTRA_PACKAGES: 'acryl-datahub-actions[executor]==0.0.13 acryl-datahub-actions==0.0.13 acryl-datahub==0.10.5'
-          ACTIONS_CONFIG: 'https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml'
+          ACTIONS_EXTRA_PACKAGES: "acryl-datahub-actions[executor]==0.0.13 acryl-datahub-actions==0.0.13 acryl-datahub==0.10.5"
+          ACTIONS_CONFIG: "https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml"
         run: |
           ./smoke-test/run-quickstart.sh
       - name: sleep 60s
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index 68432a4feb13d..ebe2990f3a3cd 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -3,7 +3,7 @@ name: documentation
 on:
   pull_request:
     branches:
-      - master
+      - "**"
   push:
     branches:
       - master
diff --git a/.github/workflows/lint-actions.yml b/.github/workflows/lint-actions.yml
index b285e46da4857..6f34bf292bf51 100644
--- a/.github/workflows/lint-actions.yml
+++ b/.github/workflows/lint-actions.yml
@@ -2,8 +2,10 @@ name: Lint actions
 on:
   pull_request:
     paths:
-      - '.github/workflows/**'
+      - ".github/workflows/**"
 
+    branches:
+      - "**"
 jobs:
   actionlint:
     runs-on: ubuntu-latest
diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml
index dea4603868f8e..699ca330ce0ac 100644
--- a/.github/workflows/metadata-ingestion.yml
+++ b/.github/workflows/metadata-ingestion.yml
@@ -9,7 +9,7 @@ on:
       - "metadata-models/**"
   pull_request:
     branches:
-      - master
+      - "**"
     paths:
       - ".github/**"
       - "metadata-ingestion/**"
diff --git a/.github/workflows/metadata-io.yml b/.github/workflows/metadata-io.yml
index e37ddd0ce4e86..48f230ce14c8d 100644
--- a/.github/workflows/metadata-io.yml
+++ b/.github/workflows/metadata-io.yml
@@ -10,7 +10,7 @@ on:
       - "metadata-io/**"
   pull_request:
     branches:
-      - master
+      - "**"
     paths:
       - "**/*.gradle"
       - "li-utils/**"
diff --git a/.github/workflows/spark-smoke-test.yml b/.github/workflows/spark-smoke-test.yml
index b2482602e7548..541b2019b93ef 100644
--- a/.github/workflows/spark-smoke-test.yml
+++ b/.github/workflows/spark-smoke-test.yml
@@ -12,7 +12,7 @@ on:
       - ".github/workflows/spark-smoke-test.yml"
   pull_request:
     branches:
-      - master
+      - "**"
     paths:
       - "metadata_models/**"
       - "metadata-integration/java/datahub-client/**"

From 3cede10ab30e22dcad286bd42bcd154732e40942 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Thu, 5 Oct 2023 13:29:47 -0400
Subject: [PATCH 096/156] feat(ingest/dbt): support `use_compiled_code` and
 `test_warnings_are_errors` (#8956)

---
 .../datahub/configuration/source_common.py    |  2 +-
 ...ation.py => validate_field_deprecation.py} | 14 +++++--
 .../ingestion/source/dbt/dbt_common.py        | 41 ++++++++++++++-----
 .../src/datahub/ingestion/source/file.py      |  2 +-
 .../ingestion/source/powerbi/config.py        |  2 +-
 .../ingestion/source/redshift/config.py       |  2 +-
 .../src/datahub/ingestion/source/s3/config.py |  2 +-
 .../ingestion/source/sql/clickhouse.py        |  2 +-
 .../ingestion/source/sql/sql_config.py        |  2 +-
 .../src/datahub/ingestion/source/tableau.py   |  2 +-
 .../tests/unit/test_pydantic_validators.py    |  2 +-
 11 files changed, 51 insertions(+), 22 deletions(-)
 rename metadata-ingestion/src/datahub/configuration/{pydantic_field_deprecation.py => validate_field_deprecation.py} (74%)

diff --git a/metadata-ingestion/src/datahub/configuration/source_common.py b/metadata-ingestion/src/datahub/configuration/source_common.py
index 37b93f3e598e1..a9f891ddb7b1e 100644
--- a/metadata-ingestion/src/datahub/configuration/source_common.py
+++ b/metadata-ingestion/src/datahub/configuration/source_common.py
@@ -4,7 +4,7 @@
 from pydantic.fields import Field
 
 from datahub.configuration.common import ConfigModel, ConfigurationError
-from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
+from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.metadata.schema_classes import FabricTypeClass
 
 DEFAULT_ENV = FabricTypeClass.PROD
diff --git a/metadata-ingestion/src/datahub/configuration/pydantic_field_deprecation.py b/metadata-ingestion/src/datahub/configuration/validate_field_deprecation.py
similarity index 74%
rename from metadata-ingestion/src/datahub/configuration/pydantic_field_deprecation.py
rename to metadata-ingestion/src/datahub/configuration/validate_field_deprecation.py
index ed82acb594ed7..6134c4dab4817 100644
--- a/metadata-ingestion/src/datahub/configuration/pydantic_field_deprecation.py
+++ b/metadata-ingestion/src/datahub/configuration/validate_field_deprecation.py
@@ -1,20 +1,28 @@
 import warnings
-from typing import Optional, Type
+from typing import Any, Optional, Type
 
 import pydantic
 
 from datahub.configuration.common import ConfigurationWarning
 from datahub.utilities.global_warning_util import add_global_warning
 
+_unset = object()
 
-def pydantic_field_deprecated(field: str, message: Optional[str] = None) -> classmethod:
+
+def pydantic_field_deprecated(
+    field: str,
+    warn_if_value_is_not: Any = _unset,
+    message: Optional[str] = None,
+) -> classmethod:
     if message:
         output = message
     else:
         output = f"{field} is deprecated and will be removed in a future release. Please remove it from your config."
 
     def _validate_deprecated(cls: Type, values: dict) -> dict:
-        if field in values:
+        if field in values and (
+            warn_if_value_is_not is _unset or values[field] != warn_if_value_is_not
+        ):
             add_global_warning(output)
             warnings.warn(output, ConfigurationWarning, stacklevel=2)
         return values
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
index f9b71892975b4..0f5c08eb6ac54 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
@@ -18,8 +18,8 @@
     ConfigurationError,
     LineageConfig,
 )
-from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
 from datahub.configuration.source_common import DatasetSourceConfigMixin
+from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.emitter import mce_builder
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.common import PipelineContext
@@ -214,7 +214,9 @@ class DBTCommonConfig(
         default=False,
         description="Use model identifier instead of model name if defined (if not, default to model name).",
     )
-    _deprecate_use_identifiers = pydantic_field_deprecated("use_identifiers")
+    _deprecate_use_identifiers = pydantic_field_deprecated(
+        "use_identifiers", warn_if_value_is_not=False
+    )
 
     entities_enabled: DBTEntitiesEnabled = Field(
         DBTEntitiesEnabled(),
@@ -278,6 +280,14 @@ class DBTCommonConfig(
         description="When enabled, converts column URNs to lowercase to ensure cross-platform compatibility. "
         "If `target_platform` is Snowflake, the default is True.",
     )
+    use_compiled_code: bool = Field(
+        default=False,
+        description="When enabled, uses the compiled dbt code instead of the raw dbt node definition.",
+    )
+    test_warnings_are_errors: bool = Field(
+        default=False,
+        description="When enabled, dbt test warnings will be treated as failures.",
+    )
 
     @validator("target_platform")
     def validate_target_platform_value(cls, target_platform: str) -> str:
@@ -811,7 +821,7 @@ def _make_assertion_from_test(
                         mce_builder.make_schema_field_urn(upstream_urn, column_name)
                     ],
                     nativeType=node.name,
-                    logic=node.compiled_code if node.compiled_code else node.raw_code,
+                    logic=node.compiled_code or node.raw_code,
                     aggregation=AssertionStdAggregationClass._NATIVE_,
                     nativeParameters=string_map(kw_args),
                 ),
@@ -825,7 +835,7 @@ def _make_assertion_from_test(
                     dataset=upstream_urn,
                     scope=DatasetAssertionScopeClass.DATASET_ROWS,
                     operator=AssertionStdOperatorClass._NATIVE_,
-                    logic=node.compiled_code if node.compiled_code else node.raw_code,
+                    logic=node.compiled_code or node.raw_code,
                     nativeType=node.name,
                     aggregation=AssertionStdAggregationClass._NATIVE_,
                     nativeParameters=string_map(kw_args),
@@ -856,6 +866,10 @@ def _make_assertion_result_from_test(
             result=AssertionResultClass(
                 type=AssertionResultTypeClass.SUCCESS
                 if test_result.status == "pass"
+                or (
+                    not self.config.test_warnings_are_errors
+                    and test_result.status == "warn"
+                )
                 else AssertionResultTypeClass.FAILURE,
                 nativeResults=test_result.native_results,
             ),
@@ -1007,8 +1021,8 @@ def create_platform_mces(
                     aspects.append(upstream_lineage_class)
 
                 # add view properties aspect
-                if node.raw_code and node.language == "sql":
-                    view_prop_aspect = self._create_view_properties_aspect(node)
+                view_prop_aspect = self._create_view_properties_aspect(node)
+                if view_prop_aspect:
                     aspects.append(view_prop_aspect)
 
                 # emit subtype mcp
@@ -1133,14 +1147,21 @@ def _create_dataset_properties_aspect(
     def get_external_url(self, node: DBTNode) -> Optional[str]:
         pass
 
-    def _create_view_properties_aspect(self, node: DBTNode) -> ViewPropertiesClass:
+    def _create_view_properties_aspect(
+        self, node: DBTNode
+    ) -> Optional[ViewPropertiesClass]:
+        view_logic = (
+            node.compiled_code if self.config.use_compiled_code else node.raw_code
+        )
+
+        if node.language != "sql" or not view_logic:
+            return None
+
         materialized = node.materialization in {"table", "incremental", "snapshot"}
-        # this function is only called when raw sql is present. assert is added to satisfy lint checks
-        assert node.raw_code is not None
         view_properties = ViewPropertiesClass(
             materialized=materialized,
             viewLanguage="SQL",
-            viewLogic=node.raw_code,
+            viewLogic=view_logic,
         )
         return view_properties
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/file.py b/metadata-ingestion/src/datahub/ingestion/source/file.py
index de61fa8481c58..590aa59f7b5b6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/file.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/file.py
@@ -16,7 +16,7 @@
 from pydantic.fields import Field
 
 from datahub.configuration.common import ConfigEnum, ConfigModel, ConfigurationError
-from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
+from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.configuration.validate_field_rename import pydantic_renamed_field
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.common import PipelineContext
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
index a8c7e48f3785c..96729f4c60c6c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -9,8 +9,8 @@
 
 import datahub.emitter.mce_builder as builder
 from datahub.configuration.common import AllowDenyPattern, ConfigModel
-from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
 from datahub.configuration.source_common import DEFAULT_ENV, DatasetSourceConfigMixin
+from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.ingestion.source.common.subtypes import BIAssetSubTypes
 from datahub.ingestion.source.state.stale_entity_removal_handler import (
     StaleEntityRemovalSourceReport,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
index 93850607e551e..804a14b0fe1cf 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
@@ -7,8 +7,8 @@
 
 from datahub.configuration import ConfigModel
 from datahub.configuration.common import AllowDenyPattern
-from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
 from datahub.configuration.source_common import DatasetLineageProviderConfigBase
+from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.ingestion.source.data_lake_common.path_spec import PathSpec
 from datahub.ingestion.source.sql.postgres import BasePostgresConfig
 from datahub.ingestion.source.state.stateful_ingestion_base import (
diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/config.py b/metadata-ingestion/src/datahub/ingestion/source/s3/config.py
index f1dd622efb746..9b5296f0b9dd5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/s3/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/s3/config.py
@@ -5,8 +5,8 @@
 from pydantic.fields import Field
 
 from datahub.configuration.common import AllowDenyPattern
-from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
 from datahub.configuration.source_common import DatasetSourceConfigMixin
+from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.configuration.validate_field_rename import pydantic_renamed_field
 from datahub.ingestion.source.aws.aws_common import AwsConnectionConfig
 from datahub.ingestion.source.data_lake_common.config import PathSpecsConfigMixin
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
index 1626f86b92545..8873038079bad 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py
@@ -19,9 +19,9 @@
 from sqlalchemy.types import BOOLEAN, DATE, DATETIME, INTEGER
 
 import datahub.emitter.mce_builder as builder
-from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
 from datahub.configuration.source_common import DatasetLineageProviderConfigBase
 from datahub.configuration.time_window_config import BaseTimeWindowConfig
+from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.emitter import mce_builder
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.decorators import (
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py
index 8f1e04b915f3b..677d32c8bac08 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py
@@ -7,8 +7,8 @@
 from pydantic import Field
 
 from datahub.configuration.common import AllowDenyPattern, ConfigModel
-from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
 from datahub.configuration.source_common import DatasetSourceConfigMixin
+from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig
 from datahub.ingestion.source.state.stale_entity_removal_handler import (
     StatefulStaleMetadataRemovalConfig,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
index 6214cba342622..e347cd26d245a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
@@ -37,11 +37,11 @@
     ConfigModel,
     ConfigurationError,
 )
-from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
 from datahub.configuration.source_common import (
     DatasetLineageProviderConfigBase,
     DatasetSourceConfigMixin,
 )
+from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.mcp_builder import (
     ContainerKey,
diff --git a/metadata-ingestion/tests/unit/test_pydantic_validators.py b/metadata-ingestion/tests/unit/test_pydantic_validators.py
index 07d86043a35bf..3e9ec6cbaf357 100644
--- a/metadata-ingestion/tests/unit/test_pydantic_validators.py
+++ b/metadata-ingestion/tests/unit/test_pydantic_validators.py
@@ -4,7 +4,7 @@
 from pydantic import ValidationError
 
 from datahub.configuration.common import ConfigModel
-from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
+from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.configuration.validate_field_removal import pydantic_removed_field
 from datahub.configuration.validate_field_rename import pydantic_renamed_field
 from datahub.utilities.global_warning_util import get_global_warnings

From debac3cf5c31b471a5a82da8d18fb8303cc8b9d0 Mon Sep 17 00:00:00 2001
From: Patrick Franco Braz <patrickfbraz@poli.ufrj.br>
Date: Thu, 5 Oct 2023 17:47:10 -0300
Subject: [PATCH 097/156] refactor(boot): increases wait timeout for servlets
 initialization (#8947)

Co-authored-by: RyanHolstien <RyanHolstien@users.noreply.github.com>
---
 .../configuration/src/main/resources/application.yml     | 3 +++
 .../metadata/boot/OnBootApplicationListener.java         | 9 +++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml
index 4be31b2b6bb15..4dfd96ac75c6c 100644
--- a/metadata-service/configuration/src/main/resources/application.yml
+++ b/metadata-service/configuration/src/main/resources/application.yml
@@ -276,6 +276,9 @@ bootstrap:
     enabled: ${UPGRADE_DEFAULT_BROWSE_PATHS_ENABLED:false} # enable to run the upgrade to migrate legacy default browse paths to new ones
   backfillBrowsePathsV2:
     enabled: ${BACKFILL_BROWSE_PATHS_V2:false} # Enables running the backfill of browsePathsV2 upgrade step. There are concerns about the load of this step so hiding it behind a flag. Deprecating in favor of running through SystemUpdate
+  servlets:
+    waitTimeout: ${BOOTSTRAP_SERVLETS_WAITTIMEOUT:60} # Total waiting time in seconds for servlets to initialize
+
 
 systemUpdate:
   initialBackOffMs: ${BOOTSTRAP_SYSTEM_UPDATE_INITIAL_BACK_OFF_MILLIS:5000}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/OnBootApplicationListener.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/OnBootApplicationListener.java
index 980cafaceae27..032b934a7ba87 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/OnBootApplicationListener.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/OnBootApplicationListener.java
@@ -15,15 +15,18 @@
 import org.apache.http.impl.client.HttpClients;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.beans.factory.annotation.Value;
 import org.springframework.context.event.ContextRefreshedEvent;
 import org.springframework.context.event.EventListener;
 import org.springframework.stereotype.Component;
 import org.springframework.web.context.WebApplicationContext;
+import org.springframework.context.annotation.Configuration;
 
 
 /**
  * Responsible for coordinating starting steps that happen before the application starts up.
  */
+@Configuration
 @Slf4j
 @Component
 public class OnBootApplicationListener {
@@ -44,6 +47,8 @@ public class OnBootApplicationListener {
   @Qualifier("configurationProvider")
   private ConfigurationProvider provider;
 
+  @Value("${bootstrap.servlets.waitTimeout}")
+  private int _servletsWaitTimeout;
 
   @EventListener(ContextRefreshedEvent.class)
   public void onApplicationEvent(@Nonnull ContextRefreshedEvent event) {
@@ -62,7 +67,7 @@ public void onApplicationEvent(@Nonnull ContextRefreshedEvent event) {
   public Runnable isSchemaRegistryAPIServletReady() {
     return () -> {
         final HttpGet request = new HttpGet(provider.getKafka().getSchemaRegistry().getUrl());
-        int timeouts = 30;
+        int timeouts = _servletsWaitTimeout;
         boolean openAPIServeletReady = false;
         while (!openAPIServeletReady && timeouts > 0) {
           try {
@@ -79,7 +84,7 @@ public Runnable isSchemaRegistryAPIServletReady() {
           timeouts--;
         }
         if (!openAPIServeletReady) {
-          log.error("Failed to bootstrap DataHub, OpenAPI servlet was not ready after 30 seconds");
+          log.error("Failed to bootstrap DataHub, OpenAPI servlet was not ready after {} seconds", timeouts);
           System.exit(1);
         } else {
         _bootstrapManager.start();

From 26bc039b967d3a62a7079522b702e97ed8ad8d27 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Thu, 5 Oct 2023 23:23:15 -0400
Subject: [PATCH 098/156] fix(ingest/unity): Remove metastore from ingestion
 and urns; standardize platform instance; add notebook filter (#8943)

---
 docs/how/updating-datahub.md                  |  5 +
 .../src/datahub/emitter/mcp_builder.py        | 10 +-
 .../datahub/ingestion/source/unity/config.py  | 45 ++++++++-
 .../datahub/ingestion/source/unity/proxy.py   | 16 +--
 .../ingestion/source/unity/proxy_types.py     | 19 ++--
 .../datahub/ingestion/source/unity/source.py  | 99 ++++++++++++-------
 6 files changed, 145 insertions(+), 49 deletions(-)

diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md
index 4df8d435cf1c4..5d0ad5eaf8f7e 100644
--- a/docs/how/updating-datahub.md
+++ b/docs/how/updating-datahub.md
@@ -9,6 +9,11 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
 - #8810 - Removed support for SQLAlchemy 1.3.x. Only SQLAlchemy 1.4.x is supported now.
 - #8853 - The Airflow plugin no longer supports Airflow 2.0.x or Python 3.7. See the docs for more details.
 - #8853 - Introduced the Airflow plugin v2. If you're using Airflow 2.3+, the v2 plugin will be enabled by default, and so you'll need to switch your requirements to include `pip install 'acryl-datahub-airflow-plugin[plugin-v2]'`. To continue using the v1 plugin, set the `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN` environment variable to `true`.
+- #8943 The Unity Catalog ingestion source has a new option `include_metastore`, which will cause all urns to be changed when disabled.
+This is currently enabled by default to preserve compatibility, but will be disabled by default and then removed in the future.
+If stateful ingestion is enabled, simply setting `include_metastore: false` will perform all required cleanup.
+Otherwise, we recommend soft deleting all databricks data via the DataHub CLI:
+`datahub delete --platform databricks --soft` and then reingesting with `include_metastore: false`.
 
 ### Potential Downtime
 
diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py
index 06f689dfd317b..65e0c0d6ba60d 100644
--- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py
+++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py
@@ -94,7 +94,15 @@ class MetastoreKey(ContainerKey):
     metastore: str
 
 
-class CatalogKey(MetastoreKey):
+class CatalogKeyWithMetastore(MetastoreKey):
+    catalog: str
+
+
+class UnitySchemaKeyWithMetastore(CatalogKeyWithMetastore):
+    unity_schema: str
+
+
+class CatalogKey(ContainerKey):
     catalog: str
 
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
index a49c789a82f27..f259fa260f653 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
@@ -1,3 +1,4 @@
+import logging
 import os
 from datetime import datetime, timedelta, timezone
 from typing import Any, Dict, Optional
@@ -21,6 +22,9 @@
     OperationConfig,
     is_profiling_enabled,
 )
+from datahub.utilities.global_warning_util import add_global_warning
+
+logger = logging.getLogger(__name__)
 
 
 class UnityCatalogProfilerConfig(ConfigModel):
@@ -97,9 +101,25 @@ class UnityCatalogSourceConfig(
         description="Name of the workspace. Default to deployment name present in workspace_url",
     )
 
+    include_metastore: bool = pydantic.Field(
+        default=True,
+        description=(
+            "Whether to ingest the workspace's metastore as a container and include it in all urns."
+            " Changing this will affect the urns of all entities in the workspace."
+            " This will be disabled by default in the future,"
+            " so it is recommended to set this to `False` for new ingestions."
+            " If you have an existing unity catalog ingestion, you'll want to avoid duplicates by soft deleting existing data."
+            " If stateful ingestion is enabled, running with `include_metastore: false` should be sufficient."
+            " Otherwise, we recommend deleting via the cli: `datahub delete --platform databricks` and re-ingesting with `include_metastore: false`."
+        ),
+    )
+
     ingest_data_platform_instance_aspect: Optional[bool] = pydantic.Field(
         default=False,
-        description="Option to enable/disable ingestion of the data platform instance aspect. The default data platform instance id for a dataset is workspace_name",
+        description=(
+            "Option to enable/disable ingestion of the data platform instance aspect."
+            " The default data platform instance id for a dataset is workspace_name"
+        ),
     )
 
     _only_ingest_assigned_metastore_removed = pydantic_removed_field(
@@ -122,6 +142,16 @@ class UnityCatalogSourceConfig(
         default=AllowDenyPattern.allow_all(),
         description="Regex patterns for tables to filter in ingestion. Specify regex to match the entire table name in `catalog.schema.table` format. e.g. to match all tables starting with customer in Customer catalog and public schema, use the regex `Customer\\.public\\.customer.*`.",
     )
+
+    notebook_pattern: AllowDenyPattern = Field(
+        default=AllowDenyPattern.allow_all(),
+        description=(
+            "Regex patterns for notebooks to filter in ingestion, based on notebook *path*."
+            " Specify regex to match the entire notebook path in `/<dir>/.../<name>` format."
+            " e.g. to match all notebooks in the root Shared directory, use the regex `/Shared/.*`."
+        ),
+    )
+
     domain: Dict[str, AllowDenyPattern] = Field(
         default=dict(),
         description='Attach domains to catalogs, schemas or tables during ingestion using regex patterns. Domain key can be a guid like *urn:li:domain:ec428203-ce86-4db3-985d-5a8ee6df32ba* or a string like "Marketing".) If you provide strings, then datahub will attempt to resolve this name to a guid, and will error out if this fails. There can be multiple domain keys specified.',
@@ -182,3 +212,16 @@ def workspace_url_should_start_with_http_scheme(cls, workspace_url: str) -> str:
                 "Workspace URL must start with http scheme. e.g. https://my-workspace.cloud.databricks.com"
             )
         return workspace_url
+
+    @pydantic.validator("include_metastore")
+    def include_metastore_warning(cls, v: bool) -> bool:
+        if v:
+            msg = (
+                "`include_metastore` is enabled."
+                " This is not recommended and will be disabled by default in the future, which is a breaking change."
+                " All databricks urns will change if you re-ingest with this disabled."
+                " We recommend soft deleting all databricks data and re-ingesting with `include_metastore` set to `False`."
+            )
+            logger.warning(msg)
+            add_global_warning(msg)
+        return v
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
index 2401f1c3d163c..529d9e7b563a5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
@@ -97,14 +97,13 @@ def __init__(
         self.report = report
 
     def check_basic_connectivity(self) -> bool:
-        self._workspace_client.metastores.summary()
-        return True
+        return bool(self._workspace_client.catalogs.list())
 
     def assigned_metastore(self) -> Metastore:
         response = self._workspace_client.metastores.summary()
         return self._create_metastore(response)
 
-    def catalogs(self, metastore: Metastore) -> Iterable[Catalog]:
+    def catalogs(self, metastore: Optional[Metastore]) -> Iterable[Catalog]:
         response = self._workspace_client.catalogs.list()
         if not response:
             logger.info("Catalogs not found")
@@ -247,7 +246,7 @@ def table_lineage(
             for item in response.get("upstreams") or []:
                 if "tableInfo" in item:
                     table_ref = TableReference.create_from_lineage(
-                        item["tableInfo"], table.schema.catalog.metastore.id
+                        item["tableInfo"], table.schema.catalog.metastore
                     )
                     if table_ref:
                         table.upstreams[table_ref] = {}
@@ -276,7 +275,7 @@ def get_column_lineage(self, table: Table, include_entity_lineage: bool) -> None
                     )
                     for item in response.get("upstream_cols", []):
                         table_ref = TableReference.create_from_lineage(
-                            item, table.schema.catalog.metastore.id
+                            item, table.schema.catalog.metastore
                         )
                         if table_ref:
                             table.upstreams.setdefault(table_ref, {}).setdefault(
@@ -305,10 +304,13 @@ def _create_metastore(
             comment=None,
         )
 
-    def _create_catalog(self, metastore: Metastore, obj: CatalogInfo) -> Catalog:
+    def _create_catalog(
+        self, metastore: Optional[Metastore], obj: CatalogInfo
+    ) -> Catalog:
+        catalog_name = self._escape_sequence(obj.name)
         return Catalog(
             name=obj.name,
-            id=f"{metastore.id}.{self._escape_sequence(obj.name)}",
+            id=f"{metastore.id}.{catalog_name}" if metastore else catalog_name,
             metastore=metastore,
             comment=obj.comment,
             owner=obj.owner,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
index 54ac2e90d7c7e..18ac2475b51e0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
@@ -92,7 +92,7 @@ class Metastore(CommonProperty):
 
 @dataclass
 class Catalog(CommonProperty):
-    metastore: Metastore
+    metastore: Optional[Metastore]
     owner: Optional[str]
     type: CatalogType
 
@@ -130,7 +130,7 @@ class ServicePrincipal:
 
 @dataclass(frozen=True, order=True)
 class TableReference:
-    metastore: str
+    metastore: Optional[str]
     catalog: str
     schema: str
     table: str
@@ -138,17 +138,21 @@ class TableReference:
     @classmethod
     def create(cls, table: "Table") -> "TableReference":
         return cls(
-            table.schema.catalog.metastore.id,
+            table.schema.catalog.metastore.id
+            if table.schema.catalog.metastore
+            else None,
             table.schema.catalog.name,
             table.schema.name,
             table.name,
         )
 
     @classmethod
-    def create_from_lineage(cls, d: dict, metastore: str) -> Optional["TableReference"]:
+    def create_from_lineage(
+        cls, d: dict, metastore: Optional[Metastore]
+    ) -> Optional["TableReference"]:
         try:
             return cls(
-                metastore,
+                metastore.id if metastore else None,
                 d["catalog_name"],
                 d["schema_name"],
                 d.get("table_name", d["name"]),  # column vs table query output
@@ -158,7 +162,10 @@ def create_from_lineage(cls, d: dict, metastore: str) -> Optional["TableReferenc
             return None
 
     def __str__(self) -> str:
-        return f"{self.metastore}.{self.catalog}.{self.schema}.{self.table}"
+        if self.metastore:
+            return f"{self.metastore}.{self.catalog}.{self.schema}.{self.table}"
+        else:
+            return self.qualified_table_name
 
     @property
     def qualified_table_name(self) -> str:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
index f2da1aece9fd4..4f7866aee7681 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
@@ -16,10 +16,12 @@
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.mcp_builder import (
     CatalogKey,
+    CatalogKeyWithMetastore,
     ContainerKey,
     MetastoreKey,
     NotebookKey,
     UnitySchemaKey,
+    UnitySchemaKeyWithMetastore,
     add_dataset_to_container,
     gen_containers,
 )
@@ -127,7 +129,7 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
     config: UnityCatalogSourceConfig
     unity_catalog_api_proxy: UnityCatalogApiProxy
     platform: str = "databricks"
-    platform_instance_name: str
+    platform_instance_name: Optional[str]
 
     def get_report(self) -> UnityCatalogReport:
         return self.report
@@ -146,11 +148,13 @@ def __init__(self, ctx: PipelineContext, config: UnityCatalogSourceConfig):
         self.external_url_base = urljoin(self.config.workspace_url, "/explore/data")
 
         # Determine the platform_instance_name
-        self.platform_instance_name = (
-            config.workspace_name
-            if config.workspace_name is not None
-            else config.workspace_url.split("//")[1].split(".")[0]
-        )
+        self.platform_instance_name = self.config.platform_instance
+        if self.config.include_metastore:
+            self.platform_instance_name = (
+                config.workspace_name
+                if config.workspace_name is not None
+                else config.workspace_url.split("//")[1].split(".")[0]
+            )
 
         if self.config.domain:
             self.domain_registry = DomainRegistry(
@@ -247,10 +251,14 @@ def build_service_principal_map(self) -> None:
 
     def process_notebooks(self) -> Iterable[MetadataWorkUnit]:
         for notebook in self.unity_catalog_api_proxy.workspace_notebooks():
+            if not self.config.notebook_pattern.allowed(notebook.path):
+                self.report.notebooks.dropped(notebook.path)
+                continue
+
             self.notebooks[str(notebook.id)] = notebook
-            yield from self._gen_notebook_aspects(notebook)
+            yield from self._gen_notebook_workunits(notebook)
 
-    def _gen_notebook_aspects(self, notebook: Notebook) -> Iterable[MetadataWorkUnit]:
+    def _gen_notebook_workunits(self, notebook: Notebook) -> Iterable[MetadataWorkUnit]:
         mcps = MetadataChangeProposalWrapper.construct_many(
             entityUrn=self.gen_notebook_urn(notebook),
             aspects=[
@@ -270,7 +278,7 @@ def _gen_notebook_aspects(self, notebook: Notebook) -> Iterable[MetadataWorkUnit
                 ),
                 SubTypesClass(typeNames=[DatasetSubTypes.NOTEBOOK]),
                 BrowsePathsClass(paths=notebook.path.split("/")),
-                # TODO: Add DPI aspect
+                self._create_data_platform_instance_aspect(),
             ],
         )
         for mcp in mcps:
@@ -296,13 +304,17 @@ def _gen_notebook_lineage(self, notebook: Notebook) -> Optional[MetadataWorkUnit
         ).as_workunit()
 
     def process_metastores(self) -> Iterable[MetadataWorkUnit]:
-        metastore = self.unity_catalog_api_proxy.assigned_metastore()
-        yield from self.gen_metastore_containers(metastore)
+        metastore: Optional[Metastore] = None
+        if self.config.include_metastore:
+            metastore = self.unity_catalog_api_proxy.assigned_metastore()
+            yield from self.gen_metastore_containers(metastore)
         yield from self.process_catalogs(metastore)
+        if metastore and self.config.include_metastore:
+            self.report.metastores.processed(metastore.id)
 
-        self.report.metastores.processed(metastore.id)
-
-    def process_catalogs(self, metastore: Metastore) -> Iterable[MetadataWorkUnit]:
+    def process_catalogs(
+        self, metastore: Optional[Metastore]
+    ) -> Iterable[MetadataWorkUnit]:
         for catalog in self.unity_catalog_api_proxy.catalogs(metastore=metastore):
             if not self.config.catalog_pattern.allowed(catalog.id):
                 self.report.catalogs.dropped(catalog.id)
@@ -353,7 +365,7 @@ def process_table(self, table: Table, schema: Schema) -> Iterable[MetadataWorkUn
         operation = self._create_table_operation_aspect(table)
         domain = self._get_domain_aspect(dataset_name=table.ref.qualified_table_name)
         ownership = self._create_table_ownership_aspect(table)
-        data_platform_instance = self._create_data_platform_instance_aspect(table)
+        data_platform_instance = self._create_data_platform_instance_aspect()
 
         if self.config.include_column_lineage:
             self.unity_catalog_api_proxy.get_column_lineage(
@@ -503,27 +515,37 @@ def gen_metastore_containers(
     def gen_catalog_containers(self, catalog: Catalog) -> Iterable[MetadataWorkUnit]:
         domain_urn = self._gen_domain_urn(catalog.name)
 
-        metastore_container_key = self.gen_metastore_key(catalog.metastore)
         catalog_container_key = self.gen_catalog_key(catalog)
         yield from gen_containers(
             container_key=catalog_container_key,
             name=catalog.name,
             sub_types=[DatasetContainerSubTypes.CATALOG],
             domain_urn=domain_urn,
-            parent_container_key=metastore_container_key,
+            parent_container_key=self.gen_metastore_key(catalog.metastore)
+            if self.config.include_metastore and catalog.metastore
+            else None,
             description=catalog.comment,
             owner_urn=self.get_owner_urn(catalog.owner),
             external_url=f"{self.external_url_base}/{catalog.name}",
         )
 
     def gen_schema_key(self, schema: Schema) -> ContainerKey:
-        return UnitySchemaKey(
-            unity_schema=schema.name,
-            platform=self.platform,
-            instance=self.config.platform_instance,
-            catalog=schema.catalog.name,
-            metastore=schema.catalog.metastore.name,
-        )
+        if self.config.include_metastore:
+            assert schema.catalog.metastore
+            return UnitySchemaKeyWithMetastore(
+                unity_schema=schema.name,
+                platform=self.platform,
+                instance=self.config.platform_instance,
+                catalog=schema.catalog.name,
+                metastore=schema.catalog.metastore.name,
+            )
+        else:
+            return UnitySchemaKey(
+                unity_schema=schema.name,
+                platform=self.platform,
+                instance=self.config.platform_instance,
+                catalog=schema.catalog.name,
+            )
 
     def gen_metastore_key(self, metastore: Metastore) -> MetastoreKey:
         return MetastoreKey(
@@ -532,13 +554,21 @@ def gen_metastore_key(self, metastore: Metastore) -> MetastoreKey:
             instance=self.config.platform_instance,
         )
 
-    def gen_catalog_key(self, catalog: Catalog) -> CatalogKey:
-        return CatalogKey(
-            catalog=catalog.name,
-            metastore=catalog.metastore.name,
-            platform=self.platform,
-            instance=self.config.platform_instance,
-        )
+    def gen_catalog_key(self, catalog: Catalog) -> ContainerKey:
+        if self.config.include_metastore:
+            assert catalog.metastore
+            return CatalogKeyWithMetastore(
+                catalog=catalog.name,
+                metastore=catalog.metastore.name,
+                platform=self.platform,
+                instance=self.config.platform_instance,
+            )
+        else:
+            return CatalogKey(
+                catalog=catalog.name,
+                platform=self.platform,
+                instance=self.config.platform_instance,
+            )
 
     def _gen_domain_urn(self, dataset_name: str) -> Optional[str]:
         domain_urn: Optional[str] = None
@@ -643,15 +673,16 @@ def _create_table_ownership_aspect(self, table: Table) -> Optional[OwnershipClas
         return None
 
     def _create_data_platform_instance_aspect(
-        self, table: Table
+        self,
     ) -> Optional[DataPlatformInstanceClass]:
-        # Only ingest the DPI aspect if the flag is true
         if self.config.ingest_data_platform_instance_aspect:
             return DataPlatformInstanceClass(
                 platform=make_data_platform_urn(self.platform),
                 instance=make_dataplatform_instance_urn(
                     self.platform, self.platform_instance_name
-                ),
+                )
+                if self.platform_instance_name
+                else None,
             )
         return None
 

From ea87febd2bdf0aebf603532be9448e6435f1fea9 Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Fri, 6 Oct 2023 14:36:32 +0900
Subject: [PATCH 099/156] fix: add retry for fetch_url (#8958)

---
 docs-website/download_historical_versions.py | 34 ++++++++++++++------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/docs-website/download_historical_versions.py b/docs-website/download_historical_versions.py
index 83157edc1972c..53ee9cf1e63ef 100644
--- a/docs-website/download_historical_versions.py
+++ b/docs-website/download_historical_versions.py
@@ -1,6 +1,7 @@
 import json
 import os
 import tarfile
+import time
 import urllib.request
 
 repo_url = "https://api.github.com/repos/datahub-project/static-assets"
@@ -16,17 +17,30 @@ def download_file(url, destination):
                 f.write(chunk)
 
 
-def fetch_urls(repo_url: str, folder_path: str, file_format: str):
+def fetch_urls(
+    repo_url: str, folder_path: str, file_format: str, max_retries=3, retry_delay=5
+):
     api_url = f"{repo_url}/contents/{folder_path}"
-    response = urllib.request.urlopen(api_url)
-    data = response.read().decode("utf-8")
-    urls = [
-        file["download_url"]
-        for file in json.loads(data)
-        if file["name"].endswith(file_format)
-    ]
-    print(urls)
-    return urls
+    for attempt in range(max_retries + 1):
+        try:
+            response = urllib.request.urlopen(api_url)
+            if response.status == 403 or (500 <= response.status < 600):
+                raise Exception(f"HTTP Error {response.status}: {response.reason}")
+            data = response.read().decode("utf-8")
+            urls = [
+                file["download_url"]
+                for file in json.loads(data)
+                if file["name"].endswith(file_format)
+            ]
+            print(urls)
+            return urls
+        except Exception as e:
+            if attempt < max_retries:
+                print(f"Attempt {attempt + 1}/{max_retries}: {e}")
+                time.sleep(retry_delay)
+            else:
+                print(f"Max retries reached. Unable to fetch data.")
+                raise
 
 
 def extract_tar_file(destination_path):

From c80da8f949aea340af73c992ff6d2bd129eb55fe Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Fri, 6 Oct 2023 10:06:36 -0400
Subject: [PATCH 100/156] feat(ingest/unity): Use ThreadPoolExecutor for CLL
 (#8952)

---
 .../datahub/ingestion/source/unity/config.py  | 11 +++++
 .../datahub/ingestion/source/unity/proxy.py   | 46 ++++++++-----------
 .../datahub/ingestion/source/unity/report.py  |  2 +
 .../datahub/ingestion/source/unity/source.py  | 33 +++++++++----
 4 files changed, 57 insertions(+), 35 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
index f259fa260f653..51390873712d3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
@@ -181,6 +181,17 @@ class UnityCatalogSourceConfig(
         description="Option to enable/disable lineage generation. Currently we have to call a rest call per column to get column level lineage due to the Databrick api which can slow down ingestion. ",
     )
 
+    column_lineage_column_limit: int = pydantic.Field(
+        default=300,
+        description="Limit the number of columns to get column level lineage. ",
+    )
+
+    lineage_max_workers: int = pydantic.Field(
+        default=5 * (os.cpu_count() or 4),
+        description="Number of worker threads to use for column lineage thread pool executor. Set to 1 to disable.",
+        hidden_from_docs=True,
+    )
+
     include_usage_statistics: bool = Field(
         default=True,
         description="Generate usage statistics.",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
index 529d9e7b563a5..9bcdb200f180e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
@@ -233,9 +233,7 @@ def list_lineages_by_column(self, table_name: str, column_name: str) -> dict:
             body={"table_name": table_name, "column_name": column_name},
         )
 
-    def table_lineage(
-        self, table: Table, include_entity_lineage: bool
-    ) -> Optional[dict]:
+    def table_lineage(self, table: Table, include_entity_lineage: bool) -> None:
         # Lineage endpoint doesn't exists on 2.1 version
         try:
             response: dict = self.list_lineages_by_table(
@@ -256,34 +254,30 @@ def table_lineage(
             for item in response.get("downstreams") or []:
                 for notebook in item.get("notebookInfos") or []:
                     table.downstream_notebooks.add(notebook["notebook_id"])
-
-            return response
         except Exception as e:
-            logger.error(f"Error getting lineage: {e}")
-            return None
+            logger.warning(
+                f"Error getting lineage on table {table.ref}: {e}", exc_info=True
+            )
 
-    def get_column_lineage(self, table: Table, include_entity_lineage: bool) -> None:
+    def get_column_lineage(self, table: Table, column_name: str) -> None:
         try:
-            table_lineage = self.table_lineage(
-                table, include_entity_lineage=include_entity_lineage
+            response: dict = self.list_lineages_by_column(
+                table_name=table.ref.qualified_table_name,
+                column_name=column_name,
             )
-            if table_lineage:
-                for column in table.columns:
-                    response: dict = self.list_lineages_by_column(
-                        table_name=table.ref.qualified_table_name,
-                        column_name=column.name,
-                    )
-                    for item in response.get("upstream_cols", []):
-                        table_ref = TableReference.create_from_lineage(
-                            item, table.schema.catalog.metastore
-                        )
-                        if table_ref:
-                            table.upstreams.setdefault(table_ref, {}).setdefault(
-                                column.name, []
-                            ).append(item["name"])
-
+            for item in response.get("upstream_cols") or []:
+                table_ref = TableReference.create_from_lineage(
+                    item, table.schema.catalog.metastore
+                )
+                if table_ref:
+                    table.upstreams.setdefault(table_ref, {}).setdefault(
+                        column_name, []
+                    ).append(item["name"])
         except Exception as e:
-            logger.error(f"Error getting lineage: {e}")
+            logger.warning(
+                f"Error getting column lineage on table {table.ref}, column {column_name}: {e}",
+                exc_info=True,
+            )
 
     @staticmethod
     def _escape_sequence(value: str) -> str:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py
index 808172a136bb3..fa61571fa92cb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py
@@ -18,6 +18,8 @@ class UnityCatalogReport(IngestionStageReport, StaleEntityRemovalSourceReport):
     table_profiles: EntityFilterReport = EntityFilterReport.field(type="table profile")
     notebooks: EntityFilterReport = EntityFilterReport.field(type="notebook")
 
+    num_column_lineage_skipped_column_count: int = 0
+
     num_queries: int = 0
     num_queries_dropped_parse_failure: int = 0
     num_queries_missing_table: int = 0  # Can be due to pattern filter
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
index 4f7866aee7681..27c1f341aa84d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
@@ -1,6 +1,7 @@
 import logging
 import re
 import time
+from concurrent.futures import ThreadPoolExecutor
 from datetime import timedelta
 from typing import Dict, Iterable, List, Optional, Set, Union
 from urllib.parse import urljoin
@@ -367,15 +368,7 @@ def process_table(self, table: Table, schema: Schema) -> Iterable[MetadataWorkUn
         ownership = self._create_table_ownership_aspect(table)
         data_platform_instance = self._create_data_platform_instance_aspect()
 
-        if self.config.include_column_lineage:
-            self.unity_catalog_api_proxy.get_column_lineage(
-                table, include_entity_lineage=self.config.include_notebooks
-            )
-        elif self.config.include_table_lineage:
-            self.unity_catalog_api_proxy.table_lineage(
-                table, include_entity_lineage=self.config.include_notebooks
-            )
-        lineage = self._generate_lineage_aspect(dataset_urn, table)
+        lineage = self.ingest_lineage(table)
 
         if self.config.include_notebooks:
             for notebook_id in table.downstream_notebooks:
@@ -401,6 +394,28 @@ def process_table(self, table: Table, schema: Schema) -> Iterable[MetadataWorkUn
             )
         ]
 
+    def ingest_lineage(self, table: Table) -> Optional[UpstreamLineageClass]:
+        if self.config.include_table_lineage:
+            self.unity_catalog_api_proxy.table_lineage(
+                table, include_entity_lineage=self.config.include_notebooks
+            )
+
+        if self.config.include_column_lineage and table.upstreams:
+            if len(table.columns) > self.config.column_lineage_column_limit:
+                self.report.num_column_lineage_skipped_column_count += 1
+
+            with ThreadPoolExecutor(
+                max_workers=self.config.lineage_max_workers
+            ) as executor:
+                for column in table.columns[: self.config.column_lineage_column_limit]:
+                    executor.submit(
+                        self.unity_catalog_api_proxy.get_column_lineage,
+                        table,
+                        column.name,
+                    )
+
+        return self._generate_lineage_aspect(self.gen_dataset_urn(table.ref), table)
+
     def _generate_lineage_aspect(
         self, dataset_urn: str, table: Table
     ) -> Optional[UpstreamLineageClass]:

From 8e7f286e71b36a07b4fedc0de1807354064a4fa5 Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Fri, 6 Oct 2023 20:12:39 +0530
Subject: [PATCH 101/156] feat(ingest/snowflake): support profiling with
 sampling (#8902)

Co-authored-by: Andrew Sikowitz <andrew.sikowitz@acryl.io>
---
 .../ingestion/source/bigquery_v2/profiler.py  | 127 ++++++----------
 .../ingestion/source/ge_data_profiler.py      |  32 +++--
 .../ingestion/source/ge_profiling_config.py   |   4 +-
 .../ingestion/source/redshift/profile.py      |  93 ++----------
 .../source/snowflake/snowflake_profiler.py    | 135 +++++-------------
 .../source/sql/sql_generic_profiler.py        | 105 +++++++++++++-
 6 files changed, 209 insertions(+), 287 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py
index b3e88459917b3..8ae17600e0eea 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py
@@ -1,12 +1,9 @@
-import dataclasses
 import logging
 from datetime import datetime
 from typing import Dict, Iterable, List, Optional, Tuple, cast
 
 from dateutil.relativedelta import relativedelta
 
-from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
-from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier
 from datahub.ingestion.source.bigquery_v2.bigquery_config import BigQueryV2Config
@@ -15,7 +12,7 @@
     RANGE_PARTITION_NAME,
     BigqueryTable,
 )
-from datahub.ingestion.source.ge_data_profiler import GEProfilerRequest
+from datahub.ingestion.source.sql.sql_generic import BaseTable
 from datahub.ingestion.source.sql.sql_generic_profiler import (
     GenericProfiler,
     TableProfilerRequest,
@@ -25,12 +22,6 @@
 logger = logging.getLogger(__name__)
 
 
-@dataclasses.dataclass
-class BigqueryProfilerRequest(GEProfilerRequest):
-    table: BigqueryTable
-    profile_table_level_only: bool = False
-
-
 class BigqueryProfiler(GenericProfiler):
     config: BigQueryV2Config
     report: BigQueryV2Report
@@ -183,84 +174,54 @@ def get_workunits(
                     )
 
                 # Emit the profile work unit
-                profile_request = self.get_bigquery_profile_request(
-                    project=project_id, dataset=dataset, table=table
-                )
+                profile_request = self.get_profile_request(table, dataset, project_id)
                 if profile_request is not None:
+                    self.report.report_entity_profiled(profile_request.pretty_name)
                     profile_requests.append(profile_request)
 
         if len(profile_requests) == 0:
             return
-        yield from self.generate_wu_from_profile_requests(profile_requests)
-
-    def generate_wu_from_profile_requests(
-        self, profile_requests: List[BigqueryProfilerRequest]
-    ) -> Iterable[MetadataWorkUnit]:
-        table_profile_requests = cast(List[TableProfilerRequest], profile_requests)
-        for request, profile in self.generate_profiles(
-            table_profile_requests,
+        yield from self.generate_profile_workunits(
+            profile_requests,
             self.config.profiling.max_workers,
             platform=self.platform,
             profiler_args=self.get_profile_args(),
-        ):
-            if request is None or profile is None:
-                continue
-
-            request = cast(BigqueryProfilerRequest, request)
-            profile.sizeInBytes = request.table.size_in_bytes
-            # If table is partitioned we profile only one partition (if nothing set then the last one)
-            # but for table level we can use the rows_count from the table metadata
-            # This way even though column statistics only reflects one partition data but the rows count
-            # shows the proper count.
-            if profile.partitionSpec and profile.partitionSpec.partition:
-                profile.rowCount = request.table.rows_count
-
-            dataset_name = request.pretty_name
-            dataset_urn = make_dataset_urn_with_platform_instance(
-                self.platform,
-                dataset_name,
-                self.config.platform_instance,
-                self.config.env,
-            )
-            # We don't add to the profiler state if we only do table level profiling as it always happens
-            if self.state_handler and not request.profile_table_level_only:
-                self.state_handler.add_to_state(
-                    dataset_urn, int(datetime.now().timestamp() * 1000)
-                )
-
-            yield MetadataChangeProposalWrapper(
-                entityUrn=dataset_urn, aspect=profile
-            ).as_workunit()
+        )
 
-    def get_bigquery_profile_request(
-        self, project: str, dataset: str, table: BigqueryTable
-    ) -> Optional[BigqueryProfilerRequest]:
-        skip_profiling = False
-        profile_table_level_only = self.config.profiling.profile_table_level_only
-        dataset_name = BigqueryTableIdentifier(
-            project_id=project, dataset=dataset, table=table.name
+    def get_dataset_name(self, table_name: str, schema_name: str, db_name: str) -> str:
+        return BigqueryTableIdentifier(
+            project_id=db_name, dataset=schema_name, table=table_name
         ).get_table_name()
-        if not self.is_dataset_eligible_for_profiling(
-            dataset_name, table.last_altered, table.size_in_bytes, table.rows_count
-        ):
-            profile_table_level_only = True
-            self.report.num_tables_not_eligible_profiling[f"{project}.{dataset}"] += 1
 
-        if not table.column_count:
-            skip_profiling = True
+    def get_batch_kwargs(
+        self, table: BaseTable, schema_name: str, db_name: str
+    ) -> dict:
+        return dict(
+            schema=db_name,  # <project>
+            table=f"{schema_name}.{table.name}",  # <dataset>.<table>
+        )
 
-        if skip_profiling:
-            if self.config.profiling.report_dropped_profiles:
-                self.report.report_dropped(f"profile of {dataset_name}")
+    def get_profile_request(
+        self, table: BaseTable, schema_name: str, db_name: str
+    ) -> Optional[TableProfilerRequest]:
+        profile_request = super().get_profile_request(table, schema_name, db_name)
+
+        if not profile_request:
             return None
 
+        # Below code handles profiling changes required for partitioned or sharded tables
+        # 1. Skip profile if partition profiling is disabled.
+        # 2. Else update `profile_request.batch_kwargs` with partition and custom_sql
+
+        bq_table = cast(BigqueryTable, table)
         (partition, custom_sql) = self.generate_partition_profiler_query(
-            project, dataset, table, self.config.profiling.partition_datetime
+            db_name, schema_name, bq_table, self.config.profiling.partition_datetime
         )
-        if partition is None and table.partition_info:
+
+        if partition is None and bq_table.partition_info:
             self.report.report_warning(
                 "profile skipped as partitioned table is empty or partition id or type was invalid",
-                dataset_name,
+                profile_request.pretty_name,
             )
             return None
         if (
@@ -268,24 +229,20 @@ def get_bigquery_profile_request(
             and not self.config.profiling.partition_profiling_enabled
         ):
             logger.debug(
-                f"{dataset_name} and partition {partition} is skipped because profiling.partition_profiling_enabled property is disabled"
+                f"{profile_request.pretty_name} and partition {partition} is skipped because profiling.partition_profiling_enabled property is disabled"
             )
             self.report.profiling_skipped_partition_profiling_disabled.append(
-                dataset_name
+                profile_request.pretty_name
             )
             return None
 
-        self.report.report_entity_profiled(dataset_name)
-        logger.debug(f"Preparing profiling request for {dataset_name}")
-        profile_request = BigqueryProfilerRequest(
-            pretty_name=dataset_name,
-            batch_kwargs=dict(
-                schema=project,
-                table=f"{dataset}.{table.name}",
-                custom_sql=custom_sql,
-                partition=partition,
-            ),
-            table=table,
-            profile_table_level_only=profile_table_level_only,
-        )
+        if partition:
+            logger.debug("Updating profiling request for partitioned/sharded tables")
+            profile_request.batch_kwargs.update(
+                dict(
+                    custom_sql=custom_sql,
+                    partition=partition,
+                )
+            )
+
         return profile_request
diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
index 01e083d566168..9f6ac9dd21164 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
@@ -273,6 +273,7 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
     partition: Optional[str]
     config: GEProfilingConfig
     report: SQLSourceReport
+    custom_sql: Optional[str]
 
     query_combiner: SQLAlchemyQueryCombiner
 
@@ -596,16 +597,8 @@ def generate_dataset_profile(  # noqa: C901 (complexity)
             "catch_exceptions", self.config.catch_exceptions
         )
 
-        profile = DatasetProfileClass(timestampMillis=get_sys_time())
-        if self.partition:
-            profile.partitionSpec = PartitionSpecClass(partition=self.partition)
-        elif self.config.limit and self.config.offset:
-            profile.partitionSpec = PartitionSpecClass(
-                type=PartitionTypeClass.QUERY,
-                partition=json.dumps(
-                    dict(limit=self.config.limit, offset=self.config.offset)
-                ),
-            )
+        profile = self.init_profile()
+
         profile.fieldProfiles = []
         self._get_dataset_rows(profile)
 
@@ -740,6 +733,24 @@ def generate_dataset_profile(  # noqa: C901 (complexity)
         self.query_combiner.flush()
         return profile
 
+    def init_profile(self):
+        profile = DatasetProfileClass(timestampMillis=get_sys_time())
+        if self.partition:
+            profile.partitionSpec = PartitionSpecClass(partition=self.partition)
+        elif self.config.limit:
+            profile.partitionSpec = PartitionSpecClass(
+                type=PartitionTypeClass.QUERY,
+                partition=json.dumps(
+                    dict(limit=self.config.limit, offset=self.config.offset)
+                ),
+            )
+        elif self.custom_sql:
+            profile.partitionSpec = PartitionSpecClass(
+                type=PartitionTypeClass.QUERY, partition="SAMPLE"
+            )
+
+        return profile
+
     def update_dataset_batch_use_sampling(self, profile: DatasetProfileClass) -> None:
         if (
             self.dataset.engine.dialect.name.lower() == BIGQUERY
@@ -1064,6 +1075,7 @@ def _generate_single_profile(
                     partition,
                     self.config,
                     self.report,
+                    custom_sql,
                     query_combiner,
                 ).generate_dataset_profile()
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py
index 77761c529ba0b..24a3e520d8caf 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py
@@ -157,12 +157,12 @@ class GEProfilingConfig(ConfigModel):
     )
     use_sampling: bool = Field(
         default=True,
-        description="Whether to profile column level stats on sample of table. Only BigQuery supports this. "
+        description="Whether to profile column level stats on sample of table. Only BigQuery and Snowflake support this. "
         "If enabled, profiling is done on rows sampled from table. Sampling is not done for smaller tables. ",
     )
 
     sample_size: int = Field(
-        default=1000,
+        default=10000,
         description="Number of rows to be sampled from table for column level profiling."
         "Applicable only if `use_sampling` is set to True.",
     )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/profile.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/profile.py
index e983734082b1d..771636e8498a3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/profile.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/profile.py
@@ -1,33 +1,19 @@
-import dataclasses
 import logging
-from datetime import datetime
-from typing import Dict, Iterable, List, Optional, Union, cast
+from typing import Dict, Iterable, List, Optional, Union
 
-from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
-from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.workunit import MetadataWorkUnit
-from datahub.ingestion.source.ge_data_profiler import GEProfilerRequest
 from datahub.ingestion.source.redshift.config import RedshiftConfig
 from datahub.ingestion.source.redshift.redshift_schema import (
     RedshiftTable,
     RedshiftView,
 )
 from datahub.ingestion.source.redshift.report import RedshiftReport
-from datahub.ingestion.source.sql.sql_generic_profiler import (
-    GenericProfiler,
-    TableProfilerRequest,
-)
+from datahub.ingestion.source.sql.sql_generic_profiler import GenericProfiler
 from datahub.ingestion.source.state.profiling_state_handler import ProfilingHandler
 
 logger = logging.getLogger(__name__)
 
 
-@dataclasses.dataclass
-class RedshiftProfilerRequest(GEProfilerRequest):
-    table: Union[RedshiftTable, RedshiftView]
-    profile_table_level_only: bool = False
-
-
 class RedshiftProfiler(GenericProfiler):
     config: RedshiftConfig
     report: RedshiftReport
@@ -63,80 +49,21 @@ def get_workunits(
                     continue
                 for table in tables[db].get(schema, {}):
                     # Emit the profile work unit
-                    profile_request = self.get_redshift_profile_request(
-                        table, schema, db
-                    )
+                    profile_request = self.get_profile_request(table, schema, db)
                     if profile_request is not None:
+                        self.report.report_entity_profiled(profile_request.pretty_name)
                         profile_requests.append(profile_request)
 
             if len(profile_requests) == 0:
                 continue
-            table_profile_requests = cast(List[TableProfilerRequest], profile_requests)
-            for request, profile in self.generate_profiles(
-                table_profile_requests,
+
+            yield from self.generate_profile_workunits(
+                profile_requests,
                 self.config.profiling.max_workers,
                 db,
                 platform=self.platform,
                 profiler_args=self.get_profile_args(),
-            ):
-                if profile is None:
-                    continue
-                request = cast(RedshiftProfilerRequest, request)
-
-                profile.sizeInBytes = request.table.size_in_bytes
-                dataset_name = request.pretty_name
-                dataset_urn = make_dataset_urn_with_platform_instance(
-                    self.platform,
-                    dataset_name,
-                    self.config.platform_instance,
-                    self.config.env,
-                )
-
-                # We don't add to the profiler state if we only do table level profiling as it always happens
-                if self.state_handler and not request.profile_table_level_only:
-                    self.state_handler.add_to_state(
-                        dataset_urn, int(datetime.now().timestamp() * 1000)
-                    )
-
-                yield MetadataChangeProposalWrapper(
-                    entityUrn=dataset_urn, aspect=profile
-                ).as_workunit()
-
-    def get_redshift_profile_request(
-        self,
-        table: Union[RedshiftTable, RedshiftView],
-        schema_name: str,
-        db_name: str,
-    ) -> Optional[RedshiftProfilerRequest]:
-        skip_profiling = False
-        profile_table_level_only = self.config.profiling.profile_table_level_only
-        dataset_name = f"{db_name}.{schema_name}.{table.name}".lower()
-        if not self.is_dataset_eligible_for_profiling(
-            dataset_name, table.last_altered, table.size_in_bytes, table.rows_count
-        ):
-            # Profile only table level if dataset is filtered from profiling
-            # due to size limits alone
-            if self.is_dataset_eligible_for_profiling(
-                dataset_name, table.last_altered, 0, 0
-            ):
-                profile_table_level_only = True
-            else:
-                skip_profiling = True
-
-        if len(table.columns) == 0:
-            skip_profiling = True
-
-        if skip_profiling:
-            if self.config.profiling.report_dropped_profiles:
-                self.report.report_dropped(f"profile of {dataset_name}")
-            return None
+            )
 
-        self.report.report_entity_profiled(dataset_name)
-        logger.debug(f"Preparing profiling request for {dataset_name}")
-        profile_request = RedshiftProfilerRequest(
-            pretty_name=dataset_name,
-            batch_kwargs=dict(schema=schema_name, table=table.name),
-            table=table,
-            profile_table_level_only=profile_table_level_only,
-        )
-        return profile_request
+    def get_dataset_name(self, table_name: str, schema_name: str, db_name: str) -> str:
+        return f"{db_name}.{schema_name}.{table_name}".lower()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py
index 5f5e8e4bcdea3..24275dcdff34d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py
@@ -1,20 +1,12 @@
-import dataclasses
 import logging
-from datetime import datetime
-from typing import Callable, Dict, Iterable, List, Optional, cast
+from typing import Callable, Dict, Iterable, List, Optional
 
 from snowflake.sqlalchemy import snowdialect
 from sqlalchemy import create_engine, inspect
 from sqlalchemy.sql import sqltypes
 
-from datahub.configuration.pattern_utils import is_schema_allowed
-from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
-from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.workunit import MetadataWorkUnit
-from datahub.ingestion.source.ge_data_profiler import (
-    DatahubGEProfiler,
-    GEProfilerRequest,
-)
+from datahub.ingestion.source.ge_data_profiler import DatahubGEProfiler
 from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config
 from datahub.ingestion.source.snowflake.snowflake_query import SnowflakeQuery
 from datahub.ingestion.source.snowflake.snowflake_report import SnowflakeV2Report
@@ -23,10 +15,8 @@
     SnowflakeTable,
 )
 from datahub.ingestion.source.snowflake.snowflake_utils import SnowflakeCommonMixin
-from datahub.ingestion.source.sql.sql_generic_profiler import (
-    GenericProfiler,
-    TableProfilerRequest,
-)
+from datahub.ingestion.source.sql.sql_generic import BaseTable
+from datahub.ingestion.source.sql.sql_generic_profiler import GenericProfiler
 from datahub.ingestion.source.state.profiling_state_handler import ProfilingHandler
 
 snowdialect.ischema_names["GEOGRAPHY"] = sqltypes.NullType
@@ -35,12 +25,6 @@
 logger = logging.getLogger(__name__)
 
 
-@dataclasses.dataclass
-class SnowflakeProfilerRequest(GEProfilerRequest):
-    table: SnowflakeTable
-    profile_table_level_only: bool = False
-
-
 class SnowflakeProfiler(GenericProfiler, SnowflakeCommonMixin):
     def __init__(
         self,
@@ -65,101 +49,52 @@ def get_workunits(
 
         profile_requests = []
         for schema in database.schemas:
-            if not is_schema_allowed(
-                self.config.schema_pattern,
-                schema.name,
-                database.name,
-                self.config.match_fully_qualified_names,
-            ):
-                continue
-
             for table in db_tables[schema.name]:
-                profile_request = self.get_snowflake_profile_request(
+                profile_request = self.get_profile_request(
                     table, schema.name, database.name
                 )
                 if profile_request is not None:
+                    self.report.report_entity_profiled(profile_request.pretty_name)
                     profile_requests.append(profile_request)
 
         if len(profile_requests) == 0:
             return
 
-        table_profile_requests = cast(List[TableProfilerRequest], profile_requests)
-
-        for request, profile in self.generate_profiles(
-            table_profile_requests,
+        yield from self.generate_profile_workunits(
+            profile_requests,
             self.config.profiling.max_workers,
             database.name,
             platform=self.platform,
             profiler_args=self.get_profile_args(),
-        ):
-            if profile is None:
-                continue
-            profile.sizeInBytes = cast(
-                SnowflakeProfilerRequest, request
-            ).table.size_in_bytes
-            dataset_name = request.pretty_name
-            dataset_urn = make_dataset_urn_with_platform_instance(
-                self.platform,
-                dataset_name,
-                self.config.platform_instance,
-                self.config.env,
-            )
-
-            # We don't add to the profiler state if we only do table level profiling as it always happens
-            if self.state_handler:
-                self.state_handler.add_to_state(
-                    dataset_urn, int(datetime.now().timestamp() * 1000)
-                )
-
-            yield MetadataChangeProposalWrapper(
-                entityUrn=dataset_urn, aspect=profile
-            ).as_workunit()
+        )
 
-    def get_snowflake_profile_request(
-        self,
-        table: SnowflakeTable,
-        schema_name: str,
-        db_name: str,
-    ) -> Optional[SnowflakeProfilerRequest]:
-        skip_profiling = False
-        profile_table_level_only = self.config.profiling.profile_table_level_only
-        dataset_name = self.get_dataset_identifier(table.name, schema_name, db_name)
-        if not self.is_dataset_eligible_for_profiling(
-            dataset_name, table.last_altered, table.size_in_bytes, table.rows_count
+    def get_dataset_name(self, table_name: str, schema_name: str, db_name: str) -> str:
+        return self.get_dataset_identifier(table_name, schema_name, db_name)
+
+    def get_batch_kwargs(
+        self, table: BaseTable, schema_name: str, db_name: str
+    ) -> dict:
+        custom_sql = None
+        if (
+            not self.config.profiling.limit
+            and self.config.profiling.use_sampling
+            and table.rows_count
+            and table.rows_count > self.config.profiling.sample_size
         ):
-            # Profile only table level if dataset is filtered from profiling
-            # due to size limits alone
-            if self.is_dataset_eligible_for_profiling(
-                dataset_name, table.last_altered, 0, 0
-            ):
-                profile_table_level_only = True
-            else:
-                skip_profiling = True
-
-        if len(table.columns) == 0:
-            skip_profiling = True
-
-        if skip_profiling:
-            if self.config.profiling.report_dropped_profiles:
-                self.report.report_dropped(f"profile of {dataset_name}")
-            return None
-
-        self.report.report_entity_profiled(dataset_name)
-        logger.debug(f"Preparing profiling request for {dataset_name}")
-        profile_request = SnowflakeProfilerRequest(
-            pretty_name=dataset_name,
-            batch_kwargs=dict(
-                schema=schema_name,
-                table=table.name,
-                # Lowercase/Mixedcase table names in Snowflake do not work by default.
-                # We need to pass `use_quoted_name=True` for such tables as mentioned here -
-                # https://github.com/great-expectations/great_expectations/pull/2023
-                use_quoted_name=(table.name != table.name.upper()),
-            ),
-            table=table,
-            profile_table_level_only=profile_table_level_only,
-        )
-        return profile_request
+            # GX creates a temporary table from query if query is passed as batch kwargs.
+            # We are using fraction-based sampling here, instead of fixed-size sampling because
+            # Fixed-size sampling can be slower than equivalent fraction-based sampling
+            # as per https://docs.snowflake.com/en/sql-reference/constructs/sample#performance-considerations
+            sample_pc = 100 * self.config.profiling.sample_size / table.rows_count
+            custom_sql = f'select * from "{db_name}"."{schema_name}"."{table.name}" TABLESAMPLE ({sample_pc:.3f})'
+        return {
+            **super().get_batch_kwargs(table, schema_name, db_name),
+            # Lowercase/Mixedcase table names in Snowflake do not work by default.
+            # We need to pass `use_quoted_name=True` for such tables as mentioned here -
+            # https://github.com/great-expectations/great_expectations/pull/2023
+            "use_quoted_name": (table.name != table.name.upper()),
+            "custom_sql": custom_sql,
+        }
 
     def get_profiler_instance(
         self, db_name: Optional[str] = None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py
index 344c114d464a9..aaeee5717a867 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py
@@ -1,12 +1,15 @@
 import logging
+from abc import abstractmethod
 from dataclasses import dataclass, field
 from datetime import datetime, timedelta, timezone
-from typing import Dict, Iterable, List, Optional, Tuple, Union, cast
+from typing import Dict, Iterable, List, Optional, Union, cast
 
 from sqlalchemy import create_engine, inspect
 from sqlalchemy.engine.reflection import Inspector
 
 from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.ge_data_profiler import (
     DatahubGEProfiler,
     GEProfilerRequest,
@@ -16,7 +19,7 @@
 from datahub.ingestion.source.sql.sql_generic import BaseTable, BaseView
 from datahub.ingestion.source.state.profiling_state_handler import ProfilingHandler
 from datahub.metadata.com.linkedin.pegasus2avro.dataset import DatasetProfile
-from datahub.metadata.schema_classes import DatasetProfileClass
+from datahub.metadata.com.linkedin.pegasus2avro.timeseries import PartitionType
 from datahub.utilities.stats_collections import TopKDict, int_top_k_dict
 
 
@@ -63,14 +66,14 @@ def __init__(
         self.platform = platform
         self.state_handler = state_handler
 
-    def generate_profiles(
+    def generate_profile_workunits(
         self,
         requests: List[TableProfilerRequest],
         max_workers: int,
         db_name: Optional[str] = None,
         platform: Optional[str] = None,
         profiler_args: Optional[Dict] = None,
-    ) -> Iterable[Tuple[GEProfilerRequest, Optional[DatasetProfileClass]]]:
+    ) -> Iterable[MetadataWorkUnit]:
         ge_profile_requests: List[GEProfilerRequest] = [
             cast(GEProfilerRequest, request)
             for request in requests
@@ -80,21 +83,109 @@ def generate_profiles(
             request for request in requests if request.profile_table_level_only
         ]
         for request in table_level_profile_requests:
-            profile = DatasetProfile(
+            table_level_profile = DatasetProfile(
                 timestampMillis=int(datetime.now().timestamp() * 1000),
                 columnCount=request.table.column_count,
                 rowCount=request.table.rows_count,
                 sizeInBytes=request.table.size_in_bytes,
             )
-            yield (request, profile)
+            dataset_urn = self.dataset_urn_builder(request.pretty_name)
+            yield MetadataChangeProposalWrapper(
+                entityUrn=dataset_urn, aspect=table_level_profile
+            ).as_workunit()
 
         if not ge_profile_requests:
             return
 
         # Otherwise, if column level profiling is enabled, use  GE profiler.
         ge_profiler = self.get_profiler_instance(db_name)
-        yield from ge_profiler.generate_profiles(
+
+        for ge_profiler_request, profile in ge_profiler.generate_profiles(
             ge_profile_requests, max_workers, platform, profiler_args
+        ):
+            if profile is None:
+                continue
+
+            request = cast(TableProfilerRequest, ge_profiler_request)
+            profile.sizeInBytes = request.table.size_in_bytes
+
+            # If table is partitioned we profile only one partition (if nothing set then the last one)
+            # but for table level we can use the rows_count from the table metadata
+            # This way even though column statistics only reflects one partition data but the rows count
+            # shows the proper count.
+            if (
+                profile.partitionSpec
+                and profile.partitionSpec.type != PartitionType.FULL_TABLE
+            ):
+                profile.rowCount = request.table.rows_count
+
+            dataset_urn = self.dataset_urn_builder(request.pretty_name)
+
+            # We don't add to the profiler state if we only do table level profiling as it always happens
+            if self.state_handler:
+                self.state_handler.add_to_state(
+                    dataset_urn, int(datetime.now().timestamp() * 1000)
+                )
+            yield MetadataChangeProposalWrapper(
+                entityUrn=dataset_urn, aspect=profile
+            ).as_workunit()
+
+    def dataset_urn_builder(self, dataset_name: str) -> str:
+        return make_dataset_urn_with_platform_instance(
+            self.platform,
+            dataset_name,
+            self.config.platform_instance,
+            self.config.env,
+        )
+
+    @abstractmethod
+    def get_dataset_name(self, table_name: str, schema_name: str, db_name: str) -> str:
+        pass
+
+    def get_profile_request(
+        self, table: BaseTable, schema_name: str, db_name: str
+    ) -> Optional[TableProfilerRequest]:
+        skip_profiling = False
+        profile_table_level_only = self.config.profiling.profile_table_level_only
+        dataset_name = self.get_dataset_name(table.name, schema_name, db_name)
+        if not self.is_dataset_eligible_for_profiling(
+            dataset_name, table.last_altered, table.size_in_bytes, table.rows_count
+        ):
+            # Profile only table level if dataset is filtered from profiling
+            # due to size limits alone
+            if self.is_dataset_eligible_for_profiling(
+                dataset_name, table.last_altered, 0, 0
+            ):
+                profile_table_level_only = True
+            else:
+                skip_profiling = True
+                self.report.num_tables_not_eligible_profiling[
+                    f"{db_name}.{schema_name}"
+                ] += 1
+
+        if table.column_count == 0:
+            skip_profiling = True
+
+        if skip_profiling:
+            if self.config.profiling.report_dropped_profiles:
+                self.report.report_dropped(f"profile of {dataset_name}")
+            return None
+
+        logger.debug(f"Preparing profiling request for {dataset_name}")
+        profile_request = TableProfilerRequest(
+            pretty_name=dataset_name,
+            batch_kwargs=self.get_batch_kwargs(table, schema_name, db_name),
+            table=table,
+            profile_table_level_only=profile_table_level_only,
+        )
+        return profile_request
+
+    def get_batch_kwargs(
+        self, table: BaseTable, schema_name: str, db_name: str
+    ) -> dict:
+        return dict(
+            schema=schema_name,
+            table=table.name,
         )
 
     def get_inspectors(self) -> Iterable[Inspector]:

From c0feceb76fbf607e2883b7f2960eaf6c757629e4 Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Fri, 6 Oct 2023 17:10:24 -0400
Subject: [PATCH 102/156] test(): Manage Access Tokens Cypress test (#8936)

---
 .../src/app/settings/AccessTokenModal.tsx     |  4 +-
 .../src/app/settings/AccessTokens.tsx         |  7 ++-
 .../src/app/settings/CreateTokenModal.tsx     | 18 +++++---
 .../e2e/settings/manage_access_tokens.js      | 43 +++++++++++++++++++
 .../tests/cypress/cypress/support/commands.js |  6 +++
 5 files changed, 70 insertions(+), 8 deletions(-)
 create mode 100644 smoke-test/tests/cypress/cypress/e2e/settings/manage_access_tokens.js

diff --git a/datahub-web-react/src/app/settings/AccessTokenModal.tsx b/datahub-web-react/src/app/settings/AccessTokenModal.tsx
index 0303db656c2a8..10427210d0692 100644
--- a/datahub-web-react/src/app/settings/AccessTokenModal.tsx
+++ b/datahub-web-react/src/app/settings/AccessTokenModal.tsx
@@ -60,7 +60,7 @@ export const AccessTokenModal = ({ visible, onClose, accessToken, expiresInText
             onCancel={onClose}
             footer={
                 <>
-                    <Button id="createTokenButton" onClick={onClose}>
+                    <Button id="createTokenButton" onClick={onClose} data-testid="access-token-modal-close-button">
                         Close
                     </Button>
                 </>
@@ -81,7 +81,7 @@ export const AccessTokenModal = ({ visible, onClose, accessToken, expiresInText
                 <ModalSectionHeader strong>Token</ModalSectionHeader>
                 <ModalSectionParagraph>{expiresInText}</ModalSectionParagraph>
                 <Typography.Paragraph copyable={{ text: accessToken }}>
-                    <pre>{accessToken}</pre>
+                    <pre data-testid="access-token-value">{accessToken}</pre>
                 </Typography.Paragraph>
             </ModalSection>
             <ModalSection>
diff --git a/datahub-web-react/src/app/settings/AccessTokens.tsx b/datahub-web-react/src/app/settings/AccessTokens.tsx
index 02ff3f1cd304c..c7a015de392da 100644
--- a/datahub-web-react/src/app/settings/AccessTokens.tsx
+++ b/datahub-web-react/src/app/settings/AccessTokens.tsx
@@ -199,7 +199,12 @@ export const AccessTokens = () => {
             key: 'x',
             render: (_, record: any) => (
                 <ActionButtonContainer>
-                    <Button onClick={() => onRemoveToken(record)} icon={<DeleteOutlined />} danger>
+                    <Button
+                        onClick={() => onRemoveToken(record)}
+                        icon={<DeleteOutlined />}
+                        danger
+                        data-testid="revoke-token-button"
+                    >
                         Revoke
                     </Button>
                 </ActionButtonContainer>
diff --git a/datahub-web-react/src/app/settings/CreateTokenModal.tsx b/datahub-web-react/src/app/settings/CreateTokenModal.tsx
index 6038a86e23303..3cc446651efcb 100644
--- a/datahub-web-react/src/app/settings/CreateTokenModal.tsx
+++ b/datahub-web-react/src/app/settings/CreateTokenModal.tsx
@@ -117,10 +117,15 @@ export default function CreateTokenModal({ currentUserUrn, visible, onClose, onC
                 onCancel={onModalClose}
                 footer={
                     <>
-                        <Button onClick={onModalClose} type="text">
+                        <Button onClick={onModalClose} type="text" data-testid="cancel-create-access-token-button">
                             Cancel
                         </Button>
-                        <Button id="createTokenButton" onClick={onCreateNewToken} disabled={createButtonEnabled}>
+                        <Button
+                            id="createTokenButton"
+                            onClick={onCreateNewToken}
+                            disabled={createButtonEnabled}
+                            data-testid="create-access-token-button"
+                        >
                             Create
                         </Button>
                     </>
@@ -148,18 +153,21 @@ export default function CreateTokenModal({ currentUserUrn, visible, onClose, onC
                             ]}
                             hasFeedback
                         >
-                            <Input placeholder="A name for your token" />
+                            <Input placeholder="A name for your token" data-testid="create-access-token-name" />
                         </Form.Item>
                     </Form.Item>
                     <Form.Item label={<Typography.Text strong>Description</Typography.Text>}>
                         <Typography.Paragraph>An optional description for your new token.</Typography.Paragraph>
                         <Form.Item name="description" rules={[{ whitespace: true }, { min: 1, max: 500 }]} hasFeedback>
-                            <Input placeholder="A description for your token" />
+                            <Input
+                                placeholder="A description for your token"
+                                data-testid="create-access-token-description"
+                            />
                         </Form.Item>
                     </Form.Item>
                     <ExpirationSelectContainer>
                         <Typography.Text strong>Expires in</Typography.Text>
-                        <Form.Item name="duration" noStyle>
+                        <Form.Item name="duration" data-testid="create-access-token-duration" noStyle>
                             <ExpirationDurationSelect>
                                 {ACCESS_TOKEN_DURATIONS.map((duration) => (
                                     <Select.Option key={duration.text} value={duration.duration}>
diff --git a/smoke-test/tests/cypress/cypress/e2e/settings/manage_access_tokens.js b/smoke-test/tests/cypress/cypress/e2e/settings/manage_access_tokens.js
new file mode 100644
index 0000000000000..7a77c2b77df5b
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress/e2e/settings/manage_access_tokens.js
@@ -0,0 +1,43 @@
+import { aliasQuery, hasOperationName } from "../utils";
+const test_id = Math.floor(Math.random() * 100000);
+
+describe("manage access tokens", () => {
+    before(() => {
+      cy.intercept("POST", "/api/v2/graphql", (req) => {
+        aliasQuery(req, "appConfig");
+      });
+    });
+    
+    const setTokenAuthEnabledFlag = (isOn) => {
+      cy.intercept("POST", "/api/v2/graphql", (req) => {
+        if (hasOperationName(req, "appConfig")) {
+          req.reply((res) => {
+            res.body.data.appConfig.authConfig.tokenAuthEnabled = isOn;
+          });
+        }
+      });
+    };
+
+    it("create and revoke access token", () => {
+      //create access token, verify token on ui
+      setTokenAuthEnabledFlag(true);
+      cy.loginWithCredentials();
+      cy.goToAccessTokenSettings();
+      cy.clickOptionWithTestId("add-token-button");
+      cy.enterTextInTestId("create-access-token-name", "Token Name" + test_id);
+      cy.enterTextInTestId("create-access-token-description", "Token Description" + test_id);
+      cy.clickOptionWithTestId("create-access-token-button");
+      cy.waitTextVisible("New Personal Access Token");
+      cy.get('[data-testid="access-token-value"]').should("be.visible");
+      cy.get('[data-testid="access-token-value"]').invoke('text').should('match', /^[a-zA-Z0-9-_]+\.[a-zA-Z0-9-_]+\.[a-zA-Z0-9-_]+$/);
+      cy.clickOptionWithTestId("access-token-modal-close-button");
+      //revoke access token, verify token removed from ui
+      cy.waitTextVisible("Token Name" + test_id);
+      cy.waitTextVisible("Token Description" + test_id);
+      cy.clickOptionWithTestId("revoke-token-button");
+      cy.waitTextVisible("Are you sure you want to revoke this token?");
+      cy.clickOptionWithText("Yes");
+      cy.ensureTextNotPresent("Token Name" + test_id);
+      cy.ensureTextNotPresent("Token Description" + test_id);
+    });
+});
\ No newline at end of file
diff --git a/smoke-test/tests/cypress/cypress/support/commands.js b/smoke-test/tests/cypress/cypress/support/commands.js
index 8bfe7305c001f..64bc1253fc383 100644
--- a/smoke-test/tests/cypress/cypress/support/commands.js
+++ b/smoke-test/tests/cypress/cypress/support/commands.js
@@ -84,6 +84,12 @@ Cypress.Commands.add("goToOwnershipTypesSettings", () => {
   cy.waitTextVisible("Manage Ownership");
 });
 
+Cypress.Commands.add("goToAccessTokenSettings", () => {
+  cy.visit("/settings/tokens");
+  cy.waitTextVisible("Manage Access Tokens");
+  cy.wait(3000);
+});
+
 Cypress.Commands.add("goToIngestionPage", () => {
   cy.visit("/ingestion");
   cy.waitTextVisible("Manage Ingestion");

From b191abbc5bb32a0a3c895facdff14d146da9fb74 Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Fri, 6 Oct 2023 17:11:57 -0400
Subject: [PATCH 103/156] test(): Nested domains cypress test (#8879)

---
 .../src/app/domain/CreateDomainModal.tsx      |  5 +-
 .../nestedDomains/ManageDomainsPageV2.tsx     |  7 ++-
 .../domainNavigator/DomainNode.tsx            |  2 +-
 .../shared/EntityDropdown/EntityDropdown.tsx  |  4 +-
 .../shared/EntityDropdown/MoveDomainModal.tsx |  5 +-
 .../cypress/e2e/domains/nested_domains.js     | 53 +++++++++++++++++++
 6 files changed, 70 insertions(+), 6 deletions(-)
 create mode 100644 smoke-test/tests/cypress/cypress/e2e/domains/nested_domains.js

diff --git a/datahub-web-react/src/app/domain/CreateDomainModal.tsx b/datahub-web-react/src/app/domain/CreateDomainModal.tsx
index ca1bc30596003..606444d34bdc9 100644
--- a/datahub-web-react/src/app/domain/CreateDomainModal.tsx
+++ b/datahub-web-react/src/app/domain/CreateDomainModal.tsx
@@ -191,7 +191,10 @@ export default function CreateDomainModal({ onClose, onCreate }: Props) {
                         rules={[{ whitespace: true }, { min: 1, max: 500 }]}
                         hasFeedback
                     >
-                        <Input.TextArea placeholder="A description for your domain" />
+                        <Input.TextArea
+                            placeholder="A description for your domain"
+                            data-testid="create-domain-description"
+                        />
                     </FormItemNoMargin>
                 </FormItemWithMargin>
                 <Collapse ghost>
diff --git a/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsPageV2.tsx b/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsPageV2.tsx
index 0e5c035df00c1..b69f0c5458b5d 100644
--- a/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsPageV2.tsx
+++ b/datahub-web-react/src/app/domain/nestedDomains/ManageDomainsPageV2.tsx
@@ -42,7 +42,12 @@ export default function ManageDomainsPageV2() {
             <OnboardingTour stepIds={[DOMAINS_INTRO_ID, DOMAINS_CREATE_DOMAIN_ID]} />
             <Header>
                 <DomainsTitle />
-                <Button type="primary" id={DOMAINS_CREATE_DOMAIN_ID} onClick={() => setIsCreatingDomain(true)}>
+                <Button
+                    type="primary"
+                    id={DOMAINS_CREATE_DOMAIN_ID}
+                    onClick={() => setIsCreatingDomain(true)}
+                    data-testid="domains-new-domain-button"
+                >
                     <PlusOutlined /> New Domain
                 </Button>
             </Header>
diff --git a/datahub-web-react/src/app/domain/nestedDomains/domainNavigator/DomainNode.tsx b/datahub-web-react/src/app/domain/nestedDomains/domainNavigator/DomainNode.tsx
index 09c8e13853bb7..bf70bd043fd4a 100644
--- a/datahub-web-react/src/app/domain/nestedDomains/domainNavigator/DomainNode.tsx
+++ b/datahub-web-react/src/app/domain/nestedDomains/domainNavigator/DomainNode.tsx
@@ -103,7 +103,7 @@ export default function DomainNode({ domain, numDomainChildren, domainUrnToHide,
 
     return (
         <>
-            <RowWrapper>
+            <RowWrapper data-testid="domain-list-item">
                 {hasDomainChildren && (
                     <ButtonWrapper>
                         <RotatingTriangle isOpen={isOpen && !isClosing} onClick={toggle} />
diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/EntityDropdown.tsx b/datahub-web-react/src/app/entity/shared/EntityDropdown/EntityDropdown.tsx
index be975249b2670..bfb7ff7e540c4 100644
--- a/datahub-web-react/src/app/entity/shared/EntityDropdown/EntityDropdown.tsx
+++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/EntityDropdown.tsx
@@ -203,7 +203,7 @@ function EntityDropdown(props: Props) {
                                 disabled={isMoveDisabled(entityType, entityData, me.platformPrivileges)}
                                 onClick={() => setIsMoveModalVisible(true)}
                             >
-                                <MenuItem>
+                                <MenuItem data-testid="entity-menu-move-button">
                                     <FolderOpenOutlined /> &nbsp;Move
                                 </MenuItem>
                             </StyledMenuItem>
@@ -223,7 +223,7 @@ function EntityDropdown(props: Props) {
                                             : undefined
                                     }
                                 >
-                                    <MenuItem>
+                                    <MenuItem data-testid="entity-menu-delete-button">
                                         <DeleteOutlined /> &nbsp;Delete
                                     </MenuItem>
                                 </Tooltip>
diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveDomainModal.tsx b/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveDomainModal.tsx
index cdbf6fdabf3c9..3826f934c1c25 100644
--- a/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveDomainModal.tsx
+++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveDomainModal.tsx
@@ -67,6 +67,7 @@ function MoveDomainModal(props: Props) {
     return (
         <Modal
             title="Move"
+            data-testid="move-domain-modal"
             visible
             onCancel={onClose}
             footer={
@@ -74,7 +75,9 @@ function MoveDomainModal(props: Props) {
                     <Button onClick={onClose} type="text">
                         Cancel
                     </Button>
-                    <Button onClick={moveDomain}>Move</Button>
+                    <Button onClick={moveDomain} data-testid="move-domain-modal-move-button">
+                        Move
+                    </Button>
                 </>
             }
         >
diff --git a/smoke-test/tests/cypress/cypress/e2e/domains/nested_domains.js b/smoke-test/tests/cypress/cypress/e2e/domains/nested_domains.js
new file mode 100644
index 0000000000000..a2d4de0f51659
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress/e2e/domains/nested_domains.js
@@ -0,0 +1,53 @@
+const domainName = "CypressNestedDomain";
+const domainDescription = "CypressNestedDomainDescription";
+
+describe("nested domains test", () => {
+
+    it("create a domain, move under parent, remove domain", () => {
+        // Create a new domain without a parent
+        cy.loginWithCredentials();
+        cy.goToDomainList();
+        cy.clickOptionWithTestId("domains-new-domain-button");
+        cy.get('[data-testid="create-domain-name"]').click().type(domainName);
+        cy.get('[data-testid="create-domain-description"]').click().type(domainDescription);
+        cy.clickOptionWithTestId("create-domain-button");
+        cy.waitTextVisible(domainName);
+
+        // Ensure the new domain has no parent in the navigation sidebar
+        cy.waitTextVisible(domainDescription);
+
+        // Move a domain from the root level to be under a parent domain
+        cy.clickOptionWithText(domainName);
+        cy.openThreeDotDropdown();
+        cy.clickOptionWithTestId("entity-menu-move-button");
+        cy.get('[data-testid="move-domain-modal"]').contains("Marketing").click({force: true});
+        cy.get('[data-testid="move-domain-modal"]').contains("Marketing").should("be.visible");
+        cy.clickOptionWithTestId("move-domain-modal-move-button").wait(5000);
+
+        // Wnsure domain is no longer on the sidebar navigator at the top level but shows up under the parent
+        cy.goToDomainList();
+        cy.ensureTextNotPresent(domainName);
+        cy.ensureTextNotPresent(domainDescription);
+        cy.waitTextVisible("1 sub-domain");
+
+        // Move a domain from under a parent domain to the root level
+        cy.get('[data-testid="domain-list-item"]').contains("Marketing").prev().click();
+        cy.clickOptionWithText(domainName);
+        cy.openThreeDotDropdown();
+        cy.clickOptionWithTestId("entity-menu-move-button");
+        cy.clickOptionWithTestId("move-domain-modal-move-button").wait(5000);
+        cy.goToDomainList();
+        cy.waitTextVisible(domainName);
+        cy.waitTextVisible(domainDescription);
+
+        // Delete a domain
+        cy.clickOptionWithText(domainName).wait(3000);
+        cy.openThreeDotDropdown();
+        cy.clickOptionWithTestId("entity-menu-delete-button");
+        cy.waitTextVisible("Are you sure you want to remove this Domain?");
+        cy.clickOptionWithText("Yes");
+        cy.waitTextVisible("Deleted Domain!");
+        cy.ensureTextNotPresent(domainName);
+        cy.ensureTextNotPresent(domainDescription);
+    });
+});
\ No newline at end of file

From 93958302d529a65021c78f880347930297854692 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Sun, 8 Oct 2023 13:26:48 -0400
Subject: [PATCH 104/156] feat(models/assertion): Add SQL Assertions (#8969)

---
 .../com/linkedin/assertion/AssertionInfo.pdl  | 17 ++++-
 .../linkedin/assertion/SqlAssertionInfo.pdl   | 67 +++++++++++++++++++
 2 files changed, 83 insertions(+), 1 deletion(-)
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/SqlAssertionInfo.pdl

diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl
index ae2a58028057b..e161270145a88 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl
@@ -32,6 +32,11 @@ record AssertionInfo includes CustomProperties, ExternalReference {
        */
       VOLUME
 
+      /**
+       * A raw SQL-statement based assertion
+       */
+      SQL
+
       /**
        * A schema or structural assertion.
        *
@@ -56,7 +61,12 @@ record AssertionInfo includes CustomProperties, ExternalReference {
     volumeAssertion: optional VolumeAssertionInfo
 
     /**
-    * An schema Assertion definition. This field is populated when the type is DATASET_SCHEMA
+    * A SQL Assertion definition. This field is populated when the type is SQL.
+    */
+    sqlAssertion: optional SqlAssertionInfo
+
+    /**
+    * An schema Assertion definition. This field is populated when the type is DATA_SCHEMA
     */
     schemaAssertion: optional SchemaAssertionInfo
 
@@ -67,4 +77,9 @@ record AssertionInfo includes CustomProperties, ExternalReference {
     * the platform where it was ingested from.
     */
     source: optional AssertionSource
+
+    /**
+    * An optional human-readable description of the assertion
+    */
+    description: optional string
 }
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/SqlAssertionInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/SqlAssertionInfo.pdl
new file mode 100644
index 0000000000000..f6ce738252f35
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/SqlAssertionInfo.pdl
@@ -0,0 +1,67 @@
+namespace com.linkedin.assertion
+
+import com.linkedin.common.Urn
+import com.linkedin.dataset.DatasetFilter
+
+/**
+* Attributes defining a SQL Assertion
+*/
+record SqlAssertionInfo {
+    /**
+     * The type of the SQL assertion being monitored.
+     */
+    @Searchable = {}
+    type: enum SqlAssertionType {
+      /**
+       * A SQL Metric Assertion, e.g. one based on a numeric value returned by an arbitrary SQL query.
+       */
+       METRIC
+       /**
+       * A SQL assertion that is evaluated against the CHANGE in a metric assertion
+       * over time.
+       */
+       METRIC_CHANGE
+    }
+
+    /**
+    * The entity targeted by this SQL check.
+    */
+    @Searchable = {
+      "fieldType": "URN"
+    }
+    @Relationship = {
+      "name": "Asserts",
+      "entityTypes": [ "dataset" ]
+    }
+    entity: Urn
+
+    /**
+    * The SQL statement to be executed when evaluating the assertion (or computing the metric).
+    * This should be a valid and complete statement, executable by itself.
+    *
+    * Usually this should be a SELECT query statement.
+    */
+    statement: string
+
+    /**
+    * The type of the value used to evaluate the assertion: a fixed absolute value or a relative percentage.
+    * This value is required if the type is METRIC_CHANGE.
+    */
+    changeType: optional AssertionValueChangeType
+
+    /**
+    * The operator you'd like to apply to the result of the SQL query.
+    *
+    * Note that at this time, only numeric operators are valid inputs:
+    * GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO,
+    * BETWEEN.
+    */
+    operator: AssertionStdOperator
+
+    /**
+    * The parameters you'd like to provide as input to the operator.
+    *
+    * Note that only numeric parameter types are valid inputs: NUMBER.
+    */
+    parameters: AssertionStdParameters
+}
\ No newline at end of file

From 8d175ef7ef1ae8ffada7b2df2fb711ac02a6785d Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Tue, 10 Oct 2023 02:04:25 +0530
Subject: [PATCH 105/156] feat(ingest): incremental lineage source helper
 (#8941)

Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
---
 .../datahub/ingestion/api/source_helpers.py   | 138 +++++++++-
 .../ingestion/source/bigquery_v2/bigquery.py  |   3 +-
 .../source/snowflake/snowflake_v2.py          |   9 +
 .../snowflake_privatelink_golden.json         | 243 +++++++++++------
 .../integration/snowflake/test_snowflake.py   |   2 +
 .../snowflake/test_snowflake_failures.py      |   6 +-
 .../snowflake/test_snowflake_stateful.py      |   3 +-
 ...l_less_upstreams_in_gms_aspect_golden.json | 106 ++++++++
 ...l_more_upstreams_in_gms_aspect_golden.json | 120 +++++++++
 .../incremental_table_lineage_golden.json     |  41 +++
 .../test_incremental_lineage_helper.py        | 244 ++++++++++++++++++
 .../source_helpers}/test_source_helpers.py    |   0
 12 files changed, 829 insertions(+), 86 deletions(-)
 create mode 100644 metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_less_upstreams_in_gms_aspect_golden.json
 create mode 100644 metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_more_upstreams_in_gms_aspect_golden.json
 create mode 100644 metadata-ingestion/tests/unit/api/source_helpers/incremental_table_lineage_golden.json
 create mode 100644 metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py
 rename metadata-ingestion/tests/unit/{ => api/source_helpers}/test_source_helpers.py (100%)

diff --git a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
index 7fc15cf829678..42f970e97c95f 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
@@ -1,3 +1,4 @@
+import copy
 import logging
 from datetime import datetime, timezone
 from typing import (
@@ -15,9 +16,14 @@
 )
 
 from datahub.configuration.time_window_config import BaseTimeWindowConfig
-from datahub.emitter.mce_builder import make_dataplatform_instance_urn
+from datahub.emitter.mce_builder import (
+    datahub_guid,
+    make_dataplatform_instance_urn,
+    set_aspect,
+)
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.graph.client import DataHubGraph
 from datahub.metadata.schema_classes import (
     BrowsePathEntryClass,
     BrowsePathsClass,
@@ -25,12 +31,17 @@
     ChangeTypeClass,
     ContainerClass,
     DatasetUsageStatisticsClass,
+    FineGrainedLineageClass,
     MetadataChangeEventClass,
     MetadataChangeProposalClass,
     StatusClass,
+    SystemMetadataClass,
     TagKeyClass,
     TimeWindowSizeClass,
+    UpstreamClass,
+    UpstreamLineageClass,
 )
+from datahub.specific.dataset import DatasetPatchBuilder
 from datahub.telemetry import telemetry
 from datahub.utilities.urns.dataset_urn import DatasetUrn
 from datahub.utilities.urns.tag_urn import TagUrn
@@ -366,3 +377,128 @@ def _prepend_platform_instance(
         return [BrowsePathEntryClass(id=urn, urn=urn)] + entries
 
     return entries
+
+
+def auto_incremental_lineage(
+    graph: Optional[DataHubGraph],
+    incremental_lineage: bool,
+    include_column_level_lineage: bool,
+    stream: Iterable[MetadataWorkUnit],
+) -> Iterable[MetadataWorkUnit]:
+    if not incremental_lineage:
+        yield from stream
+        return  # early exit
+
+    for wu in stream:
+        lineage_aspect: Optional[UpstreamLineageClass] = wu.get_aspect_of_type(
+            UpstreamLineageClass
+        )
+        urn = wu.get_urn()
+
+        if lineage_aspect:
+            if isinstance(wu.metadata, MetadataChangeEventClass):
+                set_aspect(
+                    wu.metadata, None, UpstreamLineageClass
+                )  # we'll emit upstreamLineage separately below
+                if len(wu.metadata.proposedSnapshot.aspects) > 0:
+                    yield wu
+
+            yield _lineage_wu_via_read_modify_write(
+                graph, urn, lineage_aspect, wu.metadata.systemMetadata
+            ) if include_column_level_lineage else _convert_upstream_lineage_to_patch(
+                urn, lineage_aspect, wu.metadata.systemMetadata
+            )
+        else:
+            yield wu
+
+
+def _convert_upstream_lineage_to_patch(
+    urn: str,
+    aspect: UpstreamLineageClass,
+    system_metadata: Optional[SystemMetadataClass],
+) -> MetadataWorkUnit:
+    patch_builder = DatasetPatchBuilder(urn, system_metadata)
+    for upstream in aspect.upstreams:
+        patch_builder.add_upstream_lineage(upstream)
+    mcp = next(iter(patch_builder.build()))
+    return MetadataWorkUnit(id=f"{urn}-upstreamLineage", mcp_raw=mcp)
+
+
+def _lineage_wu_via_read_modify_write(
+    graph: Optional[DataHubGraph],
+    urn: str,
+    aspect: UpstreamLineageClass,
+    system_metadata: Optional[SystemMetadataClass],
+) -> MetadataWorkUnit:
+    if graph is None:
+        raise ValueError(
+            "Failed to handle incremental lineage, DataHubGraph is missing. "
+            "Use `datahub-rest` sink OR provide `datahub-api` config in recipe. "
+        )
+    gms_aspect = graph.get_aspect(urn, UpstreamLineageClass)
+    if gms_aspect:
+        new_aspect = _merge_upstream_lineage(aspect, gms_aspect)
+    else:
+        new_aspect = aspect
+
+    return MetadataChangeProposalWrapper(
+        entityUrn=urn, aspect=new_aspect, systemMetadata=system_metadata
+    ).as_workunit()
+
+
+def _merge_upstream_lineage(
+    new_aspect: UpstreamLineageClass, gms_aspect: UpstreamLineageClass
+) -> UpstreamLineageClass:
+    merged_aspect = copy.deepcopy(gms_aspect)
+
+    upstreams_map: Dict[str, UpstreamClass] = {
+        upstream.dataset: upstream for upstream in merged_aspect.upstreams
+    }
+
+    upstreams_updated = False
+    fine_upstreams_updated = False
+
+    for table_upstream in new_aspect.upstreams:
+        if table_upstream.dataset not in upstreams_map or (
+            table_upstream.auditStamp.time
+            > upstreams_map[table_upstream.dataset].auditStamp.time
+        ):
+            upstreams_map[table_upstream.dataset] = table_upstream
+            upstreams_updated = True
+
+    if upstreams_updated:
+        merged_aspect.upstreams = list(upstreams_map.values())
+
+    if new_aspect.fineGrainedLineages and merged_aspect.fineGrainedLineages:
+        fine_upstreams_map: Dict[str, FineGrainedLineageClass] = {
+            get_fine_grained_lineage_key(fine_upstream): fine_upstream
+            for fine_upstream in merged_aspect.fineGrainedLineages
+        }
+        for column_upstream in new_aspect.fineGrainedLineages:
+            column_upstream_key = get_fine_grained_lineage_key(column_upstream)
+
+            if column_upstream_key not in fine_upstreams_map or (
+                column_upstream.confidenceScore
+                > fine_upstreams_map[column_upstream_key].confidenceScore
+            ):
+                fine_upstreams_map[column_upstream_key] = column_upstream
+                fine_upstreams_updated = True
+
+        if fine_upstreams_updated:
+            merged_aspect.fineGrainedLineages = list(fine_upstreams_map.values())
+    else:
+        merged_aspect.fineGrainedLineages = (
+            new_aspect.fineGrainedLineages or gms_aspect.fineGrainedLineages
+        )
+
+    return merged_aspect
+
+
+def get_fine_grained_lineage_key(fine_upstream: FineGrainedLineageClass) -> str:
+    return datahub_guid(
+        {
+            "upstreams": sorted(fine_upstream.upstreams or []),
+            "downstreams": sorted(fine_upstream.downstreams or []),
+            "transformOperation": fine_upstream.transformOperation,
+        }
+    )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
index fee181864a2d6..b4a04d96b532b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
@@ -461,7 +461,8 @@ def _init_schema_resolver(self) -> SchemaResolver:
                 )
             else:
                 logger.warning(
-                    "Failed to load schema info from DataHub as DataHubGraph is missing.",
+                    "Failed to load schema info from DataHub as DataHubGraph is missing. "
+                    "Use `datahub-rest` sink OR provide `datahub-api` config in recipe. ",
                 )
         return SchemaResolver(platform=self.platform, env=self.config.env)
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
index 215116b4c33fb..e0848b5f9ab34 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
@@ -4,6 +4,7 @@
 import os.path
 import platform
 from dataclasses import dataclass
+from functools import partial
 from typing import Callable, Dict, Iterable, List, Optional, Union
 
 import pandas as pd
@@ -35,6 +36,7 @@
     TestableSource,
     TestConnectionReport,
 )
+from datahub.ingestion.api.source_helpers import auto_incremental_lineage
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.glossary.classification_mixin import ClassificationHandler
 from datahub.ingestion.source.common.subtypes import (
@@ -511,6 +513,13 @@ def _init_schema_resolver(self) -> SchemaResolver:
     def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
         return [
             *super().get_workunit_processors(),
+            partial(
+                auto_incremental_lineage,
+                self.ctx.graph,
+                self.config.incremental_lineage,
+                self.config.include_column_lineage
+                or self.config.include_view_column_lineage,
+            ),
             StaleEntityRemovalHandler.create(
                 self, self.config, self.ctx
             ).workunit_processor,
diff --git a/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json b/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json
index 7687b99ac8d6d..5057dacd5b0c8 100644
--- a/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json
+++ b/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json
@@ -24,7 +24,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -39,7 +40,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -54,7 +56,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -71,7 +74,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -86,7 +90,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -115,7 +120,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -130,7 +136,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -145,7 +152,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -162,7 +170,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -177,7 +186,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -197,7 +207,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -212,7 +223,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -375,7 +387,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -401,7 +414,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -416,7 +430,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -433,7 +448,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -457,7 +473,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -472,7 +489,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -635,7 +653,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -661,7 +680,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -676,7 +696,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -693,7 +714,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -717,7 +739,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -732,7 +755,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -895,7 +919,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -921,7 +946,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -936,7 +962,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -953,7 +980,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -977,7 +1005,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -992,7 +1021,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1155,7 +1185,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1181,7 +1212,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1196,7 +1228,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1213,7 +1246,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1237,7 +1271,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1252,7 +1287,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1415,7 +1451,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1441,7 +1478,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1456,7 +1494,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1473,7 +1512,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1497,7 +1537,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1512,7 +1553,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1675,7 +1717,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1701,7 +1744,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1716,7 +1760,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1733,7 +1778,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1757,7 +1803,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1772,7 +1819,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1935,7 +1983,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1961,7 +2010,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1976,7 +2026,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1993,7 +2044,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2017,7 +2069,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2032,7 +2085,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2195,7 +2249,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2221,7 +2276,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2236,7 +2292,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2253,7 +2310,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2277,7 +2335,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2292,7 +2351,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2455,7 +2515,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2481,7 +2542,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2496,7 +2558,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2513,7 +2576,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2537,7 +2601,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2552,7 +2617,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2715,7 +2781,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2741,7 +2808,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2756,7 +2824,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2773,7 +2842,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2797,7 +2867,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2821,7 +2892,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2845,7 +2917,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2869,7 +2942,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2893,7 +2967,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2917,7 +2992,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2941,7 +3017,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2965,7 +3042,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2989,7 +3067,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -3013,7 +3092,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -3037,7 +3117,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1654621200000,
-        "runId": "snowflake-2022_06_07-17_00_00"
+        "runId": "snowflake-2022_06_07-17_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 }
 ]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py
index 2c77ace8b53e5..3dafe85ef950a 100644
--- a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py
+++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py
@@ -125,6 +125,7 @@ def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
                         validate_upstreams_against_patterns=False,
                         include_operational_stats=True,
                         email_as_user_identifier=True,
+                        incremental_lineage=False,
                         start_time=datetime(2022, 6, 6, 0, 0, 0, 0).replace(
                             tzinfo=timezone.utc
                         ),
@@ -213,6 +214,7 @@ def test_snowflake_private_link(pytestconfig, tmp_path, mock_time, mock_datahub_
                         include_views=False,
                         include_view_lineage=False,
                         include_usage_stats=False,
+                        incremental_lineage=False,
                         include_operational_stats=False,
                         start_time=datetime(2022, 6, 6, 0, 0, 0, 0).replace(
                             tzinfo=timezone.utc
diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py
index bba53c1e97a47..cd53b8f7db4f6 100644
--- a/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py
+++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py
@@ -283,10 +283,12 @@ def test_snowflake_unexpected_snowflake_view_lineage_error_causes_pipeline_warni
         )
 
         snowflake_pipeline_config1 = snowflake_pipeline_config.copy()
-        cast(
+        config = cast(
             SnowflakeV2Config,
             cast(PipelineConfig, snowflake_pipeline_config1).source.config,
-        ).include_view_lineage = True
+        )
+        config.include_view_lineage = True
+        config.incremental_lineage = False
         pipeline = Pipeline(snowflake_pipeline_config1)
         pipeline.run()
         pipeline.raise_from_status()  # pipeline should not fail
diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake_stateful.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake_stateful.py
index f72bd5b72d2cd..7e2ac94fa4e35 100644
--- a/metadata-ingestion/tests/integration/snowflake/test_snowflake_stateful.py
+++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake_stateful.py
@@ -31,6 +31,7 @@ def stateful_pipeline_config(include_tables: bool) -> PipelineConfig:
                 match_fully_qualified_names=True,
                 schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]),
                 include_tables=include_tables,
+                incremental_lineage=False,
                 stateful_ingestion=StatefulStaleMetadataRemovalConfig.parse_obj(
                     {
                         "enabled": True,
@@ -49,7 +50,7 @@ def stateful_pipeline_config(include_tables: bool) -> PipelineConfig:
 
 
 @freeze_time(FROZEN_TIME)
-def test_tableau_stateful(mock_datahub_graph):
+def test_stale_metadata_removal(mock_datahub_graph):
     with mock.patch(
         "datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph",
         mock_datahub_graph,
diff --git a/metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_less_upstreams_in_gms_aspect_golden.json b/metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_less_upstreams_in_gms_aspect_golden.json
new file mode 100644
index 0000000000000..812566143014b
--- /dev/null
+++ b/metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_less_upstreams_in_gms_aspect_golden.json
@@ -0,0 +1,106 @@
+[
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD)",
+                    "type": "TRANSFORMED"
+                },
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_a)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_a)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_b)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_b)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_c)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_c)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_a)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_a)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_a)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_b)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_b)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_b)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_c)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_c)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_c)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "run-id",
+        "lastRunId": "no-run-id-provided"
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_more_upstreams_in_gms_aspect_golden.json b/metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_more_upstreams_in_gms_aspect_golden.json
new file mode 100644
index 0000000000000..17f4d10728268
--- /dev/null
+++ b/metadata-ingestion/tests/unit/api/source_helpers/incremental_cll_more_upstreams_in_gms_aspect_golden.json
@@ -0,0 +1,120 @@
+[
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD)",
+                    "type": "TRANSFORMED"
+                },
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD)",
+                    "type": "TRANSFORMED"
+                },
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:platform,upstream3,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ],
+            "fineGrainedLineages": [
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_a)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_a)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream3,PROD),col_a)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_a)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_b)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_b)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream3,PROD),col_b)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_b)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_c)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_c)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream3,PROD),col_c)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_c)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_a)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_a)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_a)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_b)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_b)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_b)"
+                    ],
+                    "confidenceScore": 1.0
+                },
+                {
+                    "upstreamType": "FIELD_SET",
+                    "upstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD),col_c)",
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD),col_c)"
+                    ],
+                    "downstreamType": "FIELD",
+                    "downstreams": [
+                        "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD),col_c)"
+                    ],
+                    "confidenceScore": 1.0
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "run-id",
+        "lastRunId": "no-run-id-provided"
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/api/source_helpers/incremental_table_lineage_golden.json b/metadata-ingestion/tests/unit/api/source_helpers/incremental_table_lineage_golden.json
new file mode 100644
index 0000000000000..c828373c73080
--- /dev/null
+++ b/metadata-ingestion/tests/unit/api/source_helpers/incremental_table_lineage_golden.json
@@ -0,0 +1,41 @@
+[
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:platform,dataset1,PROD)",
+    "changeType": "PATCH",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": [
+            {
+                "op": "add",
+                "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aplatform%2Cupstream1%2CPROD%29",
+                "value": {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:platform,upstream1,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            },
+            {
+                "op": "add",
+                "path": "/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aplatform%2Cupstream2%2CPROD%29",
+                "value": {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:platform,upstream2,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            }
+        ]
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "run-id",
+        "lastRunId": "no-run-id-provided"
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py b/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py
new file mode 100644
index 0000000000000..4078bda26c743
--- /dev/null
+++ b/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py
@@ -0,0 +1,244 @@
+from typing import List, Optional
+from unittest.mock import MagicMock
+
+import pytest
+
+import datahub.metadata.schema_classes as models
+from datahub.emitter.mce_builder import make_dataset_urn, make_schema_field_urn
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.api.source_helpers import auto_incremental_lineage
+from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.sink.file import write_metadata_file
+from tests.test_helpers import mce_helpers
+
+platform = "platform"
+system_metadata = models.SystemMetadataClass(lastObserved=1643871600000, runId="run-id")
+
+
+def make_lineage_aspect(
+    dataset_name: str,
+    upstreams: List[str],
+    timestamp: int = 0,
+    columns: List[str] = [],
+    include_cll: bool = False,
+) -> models.UpstreamLineageClass:
+    """
+    Generates dataset properties and upstream lineage aspects
+    with simple column to column lineage between current dataset and all upstreams
+    """
+
+    dataset_urn = make_dataset_urn(platform, dataset_name)
+    return models.UpstreamLineageClass(
+        upstreams=[
+            models.UpstreamClass(
+                dataset=upstream_urn,
+                type=models.DatasetLineageTypeClass.TRANSFORMED,
+                auditStamp=models.AuditStampClass(
+                    time=timestamp, actor="urn:li:corpuser:unknown"
+                ),
+            )
+            for upstream_urn in upstreams
+        ],
+        fineGrainedLineages=[
+            models.FineGrainedLineageClass(
+                upstreamType=models.FineGrainedLineageUpstreamTypeClass.FIELD_SET,
+                downstreamType=models.FineGrainedLineageDownstreamTypeClass.FIELD,
+                upstreams=[
+                    make_schema_field_urn(upstream_urn, col)
+                    for upstream_urn in upstreams
+                ],
+                downstreams=[make_schema_field_urn(dataset_urn, col)],
+            )
+            for col in columns
+        ]
+        if include_cll
+        else None,
+    )
+
+
+def base_table_lineage_aspect() -> models.UpstreamLineageClass:
+    return make_lineage_aspect(
+        "dataset1",
+        upstreams=[
+            make_dataset_urn(platform, name) for name in ["upstream1", "upstream2"]
+        ],
+    )
+
+
+def base_cll_aspect(timestamp: int = 0) -> models.UpstreamLineageClass:
+    return make_lineage_aspect(
+        "dataset1",
+        upstreams=[
+            make_dataset_urn(platform, name) for name in ["upstream1", "upstream2"]
+        ],
+        timestamp=timestamp,
+        columns=["col_a", "col_b", "col_c"],
+        include_cll=True,
+    )
+
+
+def test_incremental_table_lineage(tmp_path, pytestconfig):
+    test_resources_dir = pytestconfig.rootpath / "tests/unit/api/source_helpers"
+    test_file = tmp_path / "incremental_table_lineage.json"
+    golden_file = test_resources_dir / "incremental_table_lineage_golden.json"
+
+    urn = make_dataset_urn(platform, "dataset1")
+    aspect = base_table_lineage_aspect()
+
+    processed_wus = auto_incremental_lineage(
+        graph=None,
+        incremental_lineage=True,
+        include_column_level_lineage=False,
+        stream=[
+            MetadataChangeProposalWrapper(
+                entityUrn=urn, aspect=aspect, systemMetadata=system_metadata
+            ).as_workunit()
+        ],
+    )
+
+    write_metadata_file(
+        test_file,
+        [wu.metadata for wu in processed_wus],
+    )
+    mce_helpers.check_golden_file(
+        pytestconfig=pytestconfig, output_path=test_file, golden_path=golden_file
+    )
+
+
+@pytest.mark.parametrize(
+    "gms_aspect,current_aspect,output_aspect",
+    [
+        # emitting CLL upstreamLineage over table level upstreamLineage
+        [
+            base_table_lineage_aspect(),
+            base_cll_aspect(),
+            base_cll_aspect(),
+        ],
+        # emitting upstreamLineage for the first time
+        [
+            None,
+            base_cll_aspect(),
+            base_cll_aspect(),
+        ],
+        # emitting CLL upstreamLineage over same CLL upstreamLineage
+        [
+            base_cll_aspect(),
+            base_cll_aspect(),
+            base_cll_aspect(),
+        ],
+        # emitting CLL upstreamLineage over same CLL upstreamLineage but with earlier timestamp
+        [
+            base_cll_aspect(),  # default timestamp is 0
+            base_cll_aspect(timestamp=1643871600000),
+            base_cll_aspect(timestamp=1643871600000),
+        ],
+    ],
+)
+def test_incremental_column_level_lineage(
+    gms_aspect: Optional[models.UpstreamLineageClass],
+    current_aspect: models.UpstreamLineageClass,
+    output_aspect: models.UpstreamLineageClass,
+) -> None:
+    mock_graph = MagicMock()
+    mock_graph.get_aspect.return_value = gms_aspect
+    dataset_urn = make_dataset_urn(platform, "dataset1")
+
+    processed_wus = auto_incremental_lineage(
+        graph=mock_graph,
+        incremental_lineage=True,
+        include_column_level_lineage=True,
+        stream=[
+            MetadataChangeProposalWrapper(
+                entityUrn=dataset_urn,
+                aspect=current_aspect,
+                systemMetadata=system_metadata,
+            ).as_workunit()
+        ],
+    )
+
+    wu: MetadataWorkUnit = next(iter(processed_wus))
+    aspect = wu.get_aspect_of_type(models.UpstreamLineageClass)
+    assert aspect == output_aspect
+
+
+def test_incremental_column_lineage_less_upstreams_in_gms_aspect(
+    tmp_path, pytestconfig
+):
+    test_resources_dir = pytestconfig.rootpath / "tests/unit/api/source_helpers"
+    test_file = tmp_path / "incremental_cll_less_upstreams_in_gms_aspect.json"
+    golden_file = (
+        test_resources_dir / "incremental_cll_less_upstreams_in_gms_aspect_golden.json"
+    )
+
+    urn = make_dataset_urn(platform, "dataset1")
+    aspect = base_cll_aspect()
+
+    mock_graph = MagicMock()
+    mock_graph.get_aspect.return_value = make_lineage_aspect(
+        "dataset1",
+        upstreams=[make_dataset_urn(platform, name) for name in ["upstream1"]],
+        columns=["col_a", "col_b", "col_c"],
+        include_cll=True,
+    )
+
+    processed_wus = auto_incremental_lineage(
+        graph=mock_graph,
+        incremental_lineage=True,
+        include_column_level_lineage=True,
+        stream=[
+            MetadataChangeProposalWrapper(
+                entityUrn=urn, aspect=aspect, systemMetadata=system_metadata
+            ).as_workunit()
+        ],
+    )
+
+    write_metadata_file(
+        test_file,
+        [wu.metadata for wu in processed_wus],
+    )
+    mce_helpers.check_golden_file(
+        pytestconfig=pytestconfig, output_path=test_file, golden_path=golden_file
+    )
+
+
+def test_incremental_column_lineage_more_upstreams_in_gms_aspect(
+    tmp_path, pytestconfig
+):
+    test_resources_dir = pytestconfig.rootpath / "tests/unit/api/source_helpers"
+    test_file = tmp_path / "incremental_cll_more_upstreams_in_gms_aspect.json"
+    golden_file = (
+        test_resources_dir / "incremental_cll_more_upstreams_in_gms_aspect_golden.json"
+    )
+
+    urn = make_dataset_urn(platform, "dataset1")
+    aspect = base_cll_aspect()
+
+    mock_graph = MagicMock()
+    mock_graph.get_aspect.return_value = make_lineage_aspect(
+        "dataset1",
+        upstreams=[
+            make_dataset_urn(platform, name)
+            for name in ["upstream1", "upstream2", "upstream3"]
+        ],
+        columns=["col_a", "col_b", "col_c"],
+        include_cll=True,
+    )
+
+    processed_wus = auto_incremental_lineage(
+        graph=mock_graph,
+        incremental_lineage=True,
+        include_column_level_lineage=True,
+        stream=[
+            MetadataChangeProposalWrapper(
+                entityUrn=urn, aspect=aspect, systemMetadata=system_metadata
+            ).as_workunit()
+        ],
+    )
+
+    write_metadata_file(
+        test_file,
+        [wu.metadata for wu in processed_wus],
+    )
+    mce_helpers.check_golden_file(
+        pytestconfig=pytestconfig, output_path=test_file, golden_path=golden_file
+    )
diff --git a/metadata-ingestion/tests/unit/test_source_helpers.py b/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py
similarity index 100%
rename from metadata-ingestion/tests/unit/test_source_helpers.py
rename to metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py

From 57f855ecd11632e884b12fda0fc57e2694ee26a5 Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Tue, 10 Oct 2023 12:18:21 +0530
Subject: [PATCH 106/156] feat(ingest): refactor + simplify incremental lineage
 helper (#8976)

---
 .../api/incremental_lineage_helper.py         | 139 ++++++++++++++++++
 .../datahub/ingestion/api/source_helpers.py   | 138 +----------------
 .../source/snowflake/snowflake_v2.py          |   4 +-
 .../test_incremental_lineage_helper.py        |   6 +-
 4 files changed, 142 insertions(+), 145 deletions(-)
 create mode 100644 metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py

diff --git a/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py b/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py
new file mode 100644
index 0000000000000..9478c5cf7efa2
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py
@@ -0,0 +1,139 @@
+import copy
+from typing import Dict, Iterable, Optional
+
+from datahub.emitter.mce_builder import datahub_guid, set_aspect
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.graph.client import DataHubGraph
+from datahub.metadata.schema_classes import (
+    FineGrainedLineageClass,
+    MetadataChangeEventClass,
+    SystemMetadataClass,
+    UpstreamClass,
+    UpstreamLineageClass,
+)
+from datahub.specific.dataset import DatasetPatchBuilder
+
+
+def _convert_upstream_lineage_to_patch(
+    urn: str,
+    aspect: UpstreamLineageClass,
+    system_metadata: Optional[SystemMetadataClass],
+) -> MetadataWorkUnit:
+    patch_builder = DatasetPatchBuilder(urn, system_metadata)
+    for upstream in aspect.upstreams:
+        patch_builder.add_upstream_lineage(upstream)
+    mcp = next(iter(patch_builder.build()))
+    return MetadataWorkUnit(id=f"{urn}-upstreamLineage", mcp_raw=mcp)
+
+
+def get_fine_grained_lineage_key(fine_upstream: FineGrainedLineageClass) -> str:
+    return datahub_guid(
+        {
+            "upstreams": sorted(fine_upstream.upstreams or []),
+            "downstreams": sorted(fine_upstream.downstreams or []),
+            "transformOperation": fine_upstream.transformOperation,
+        }
+    )
+
+
+def _merge_upstream_lineage(
+    new_aspect: UpstreamLineageClass, gms_aspect: UpstreamLineageClass
+) -> UpstreamLineageClass:
+    merged_aspect = copy.deepcopy(gms_aspect)
+
+    upstreams_map: Dict[str, UpstreamClass] = {
+        upstream.dataset: upstream for upstream in merged_aspect.upstreams
+    }
+
+    upstreams_updated = False
+    fine_upstreams_updated = False
+
+    for table_upstream in new_aspect.upstreams:
+        if table_upstream.dataset not in upstreams_map or (
+            table_upstream.auditStamp.time
+            > upstreams_map[table_upstream.dataset].auditStamp.time
+        ):
+            upstreams_map[table_upstream.dataset] = table_upstream
+            upstreams_updated = True
+
+    if upstreams_updated:
+        merged_aspect.upstreams = list(upstreams_map.values())
+
+    if new_aspect.fineGrainedLineages and merged_aspect.fineGrainedLineages:
+        fine_upstreams_map: Dict[str, FineGrainedLineageClass] = {
+            get_fine_grained_lineage_key(fine_upstream): fine_upstream
+            for fine_upstream in merged_aspect.fineGrainedLineages
+        }
+        for column_upstream in new_aspect.fineGrainedLineages:
+            column_upstream_key = get_fine_grained_lineage_key(column_upstream)
+
+            if column_upstream_key not in fine_upstreams_map or (
+                column_upstream.confidenceScore
+                > fine_upstreams_map[column_upstream_key].confidenceScore
+            ):
+                fine_upstreams_map[column_upstream_key] = column_upstream
+                fine_upstreams_updated = True
+
+        if fine_upstreams_updated:
+            merged_aspect.fineGrainedLineages = list(fine_upstreams_map.values())
+    else:
+        merged_aspect.fineGrainedLineages = (
+            new_aspect.fineGrainedLineages or gms_aspect.fineGrainedLineages
+        )
+
+    return merged_aspect
+
+
+def _lineage_wu_via_read_modify_write(
+    graph: Optional[DataHubGraph],
+    urn: str,
+    aspect: UpstreamLineageClass,
+    system_metadata: Optional[SystemMetadataClass],
+) -> MetadataWorkUnit:
+    if graph is None:
+        raise ValueError(
+            "Failed to handle incremental lineage, DataHubGraph is missing. "
+            "Use `datahub-rest` sink OR provide `datahub-api` config in recipe. "
+        )
+    gms_aspect = graph.get_aspect(urn, UpstreamLineageClass)
+    if gms_aspect:
+        new_aspect = _merge_upstream_lineage(aspect, gms_aspect)
+    else:
+        new_aspect = aspect
+
+    return MetadataChangeProposalWrapper(
+        entityUrn=urn, aspect=new_aspect, systemMetadata=system_metadata
+    ).as_workunit()
+
+
+def auto_incremental_lineage(
+    graph: Optional[DataHubGraph],
+    incremental_lineage: bool,
+    stream: Iterable[MetadataWorkUnit],
+) -> Iterable[MetadataWorkUnit]:
+    if not incremental_lineage:
+        yield from stream
+        return  # early exit
+
+    for wu in stream:
+        lineage_aspect: Optional[UpstreamLineageClass] = wu.get_aspect_of_type(
+            UpstreamLineageClass
+        )
+        urn = wu.get_urn()
+
+        if lineage_aspect:
+            if isinstance(wu.metadata, MetadataChangeEventClass):
+                set_aspect(
+                    wu.metadata, None, UpstreamLineageClass
+                )  # we'll emit upstreamLineage separately below
+                if len(wu.metadata.proposedSnapshot.aspects) > 0:
+                    yield wu
+
+            yield _lineage_wu_via_read_modify_write(
+                graph, urn, lineage_aspect, wu.metadata.systemMetadata
+            ) if lineage_aspect.fineGrainedLineages else _convert_upstream_lineage_to_patch(
+                urn, lineage_aspect, wu.metadata.systemMetadata
+            )
+        else:
+            yield wu
diff --git a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
index 42f970e97c95f..7fc15cf829678 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
@@ -1,4 +1,3 @@
-import copy
 import logging
 from datetime import datetime, timezone
 from typing import (
@@ -16,14 +15,9 @@
 )
 
 from datahub.configuration.time_window_config import BaseTimeWindowConfig
-from datahub.emitter.mce_builder import (
-    datahub_guid,
-    make_dataplatform_instance_urn,
-    set_aspect,
-)
+from datahub.emitter.mce_builder import make_dataplatform_instance_urn
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.workunit import MetadataWorkUnit
-from datahub.ingestion.graph.client import DataHubGraph
 from datahub.metadata.schema_classes import (
     BrowsePathEntryClass,
     BrowsePathsClass,
@@ -31,17 +25,12 @@
     ChangeTypeClass,
     ContainerClass,
     DatasetUsageStatisticsClass,
-    FineGrainedLineageClass,
     MetadataChangeEventClass,
     MetadataChangeProposalClass,
     StatusClass,
-    SystemMetadataClass,
     TagKeyClass,
     TimeWindowSizeClass,
-    UpstreamClass,
-    UpstreamLineageClass,
 )
-from datahub.specific.dataset import DatasetPatchBuilder
 from datahub.telemetry import telemetry
 from datahub.utilities.urns.dataset_urn import DatasetUrn
 from datahub.utilities.urns.tag_urn import TagUrn
@@ -377,128 +366,3 @@ def _prepend_platform_instance(
         return [BrowsePathEntryClass(id=urn, urn=urn)] + entries
 
     return entries
-
-
-def auto_incremental_lineage(
-    graph: Optional[DataHubGraph],
-    incremental_lineage: bool,
-    include_column_level_lineage: bool,
-    stream: Iterable[MetadataWorkUnit],
-) -> Iterable[MetadataWorkUnit]:
-    if not incremental_lineage:
-        yield from stream
-        return  # early exit
-
-    for wu in stream:
-        lineage_aspect: Optional[UpstreamLineageClass] = wu.get_aspect_of_type(
-            UpstreamLineageClass
-        )
-        urn = wu.get_urn()
-
-        if lineage_aspect:
-            if isinstance(wu.metadata, MetadataChangeEventClass):
-                set_aspect(
-                    wu.metadata, None, UpstreamLineageClass
-                )  # we'll emit upstreamLineage separately below
-                if len(wu.metadata.proposedSnapshot.aspects) > 0:
-                    yield wu
-
-            yield _lineage_wu_via_read_modify_write(
-                graph, urn, lineage_aspect, wu.metadata.systemMetadata
-            ) if include_column_level_lineage else _convert_upstream_lineage_to_patch(
-                urn, lineage_aspect, wu.metadata.systemMetadata
-            )
-        else:
-            yield wu
-
-
-def _convert_upstream_lineage_to_patch(
-    urn: str,
-    aspect: UpstreamLineageClass,
-    system_metadata: Optional[SystemMetadataClass],
-) -> MetadataWorkUnit:
-    patch_builder = DatasetPatchBuilder(urn, system_metadata)
-    for upstream in aspect.upstreams:
-        patch_builder.add_upstream_lineage(upstream)
-    mcp = next(iter(patch_builder.build()))
-    return MetadataWorkUnit(id=f"{urn}-upstreamLineage", mcp_raw=mcp)
-
-
-def _lineage_wu_via_read_modify_write(
-    graph: Optional[DataHubGraph],
-    urn: str,
-    aspect: UpstreamLineageClass,
-    system_metadata: Optional[SystemMetadataClass],
-) -> MetadataWorkUnit:
-    if graph is None:
-        raise ValueError(
-            "Failed to handle incremental lineage, DataHubGraph is missing. "
-            "Use `datahub-rest` sink OR provide `datahub-api` config in recipe. "
-        )
-    gms_aspect = graph.get_aspect(urn, UpstreamLineageClass)
-    if gms_aspect:
-        new_aspect = _merge_upstream_lineage(aspect, gms_aspect)
-    else:
-        new_aspect = aspect
-
-    return MetadataChangeProposalWrapper(
-        entityUrn=urn, aspect=new_aspect, systemMetadata=system_metadata
-    ).as_workunit()
-
-
-def _merge_upstream_lineage(
-    new_aspect: UpstreamLineageClass, gms_aspect: UpstreamLineageClass
-) -> UpstreamLineageClass:
-    merged_aspect = copy.deepcopy(gms_aspect)
-
-    upstreams_map: Dict[str, UpstreamClass] = {
-        upstream.dataset: upstream for upstream in merged_aspect.upstreams
-    }
-
-    upstreams_updated = False
-    fine_upstreams_updated = False
-
-    for table_upstream in new_aspect.upstreams:
-        if table_upstream.dataset not in upstreams_map or (
-            table_upstream.auditStamp.time
-            > upstreams_map[table_upstream.dataset].auditStamp.time
-        ):
-            upstreams_map[table_upstream.dataset] = table_upstream
-            upstreams_updated = True
-
-    if upstreams_updated:
-        merged_aspect.upstreams = list(upstreams_map.values())
-
-    if new_aspect.fineGrainedLineages and merged_aspect.fineGrainedLineages:
-        fine_upstreams_map: Dict[str, FineGrainedLineageClass] = {
-            get_fine_grained_lineage_key(fine_upstream): fine_upstream
-            for fine_upstream in merged_aspect.fineGrainedLineages
-        }
-        for column_upstream in new_aspect.fineGrainedLineages:
-            column_upstream_key = get_fine_grained_lineage_key(column_upstream)
-
-            if column_upstream_key not in fine_upstreams_map or (
-                column_upstream.confidenceScore
-                > fine_upstreams_map[column_upstream_key].confidenceScore
-            ):
-                fine_upstreams_map[column_upstream_key] = column_upstream
-                fine_upstreams_updated = True
-
-        if fine_upstreams_updated:
-            merged_aspect.fineGrainedLineages = list(fine_upstreams_map.values())
-    else:
-        merged_aspect.fineGrainedLineages = (
-            new_aspect.fineGrainedLineages or gms_aspect.fineGrainedLineages
-        )
-
-    return merged_aspect
-
-
-def get_fine_grained_lineage_key(fine_upstream: FineGrainedLineageClass) -> str:
-    return datahub_guid(
-        {
-            "upstreams": sorted(fine_upstream.upstreams or []),
-            "downstreams": sorted(fine_upstream.downstreams or []),
-            "transformOperation": fine_upstream.transformOperation,
-        }
-    )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
index e0848b5f9ab34..a5c07d9a3870c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
@@ -27,6 +27,7 @@
     platform_name,
     support_status,
 )
+from datahub.ingestion.api.incremental_lineage_helper import auto_incremental_lineage
 from datahub.ingestion.api.source import (
     CapabilityReport,
     MetadataWorkUnitProcessor,
@@ -36,7 +37,6 @@
     TestableSource,
     TestConnectionReport,
 )
-from datahub.ingestion.api.source_helpers import auto_incremental_lineage
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.glossary.classification_mixin import ClassificationHandler
 from datahub.ingestion.source.common.subtypes import (
@@ -517,8 +517,6 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
                 auto_incremental_lineage,
                 self.ctx.graph,
                 self.config.incremental_lineage,
-                self.config.include_column_lineage
-                or self.config.include_view_column_lineage,
             ),
             StaleEntityRemovalHandler.create(
                 self, self.config, self.ctx
diff --git a/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py b/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py
index 4078bda26c743..54a22d860285c 100644
--- a/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py
+++ b/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py
@@ -6,7 +6,7 @@
 import datahub.metadata.schema_classes as models
 from datahub.emitter.mce_builder import make_dataset_urn, make_schema_field_urn
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
-from datahub.ingestion.api.source_helpers import auto_incremental_lineage
+from datahub.ingestion.api.incremental_lineage_helper import auto_incremental_lineage
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.sink.file import write_metadata_file
 from tests.test_helpers import mce_helpers
@@ -88,7 +88,6 @@ def test_incremental_table_lineage(tmp_path, pytestconfig):
     processed_wus = auto_incremental_lineage(
         graph=None,
         incremental_lineage=True,
-        include_column_level_lineage=False,
         stream=[
             MetadataChangeProposalWrapper(
                 entityUrn=urn, aspect=aspect, systemMetadata=system_metadata
@@ -146,7 +145,6 @@ def test_incremental_column_level_lineage(
     processed_wus = auto_incremental_lineage(
         graph=mock_graph,
         incremental_lineage=True,
-        include_column_level_lineage=True,
         stream=[
             MetadataChangeProposalWrapper(
                 entityUrn=dataset_urn,
@@ -184,7 +182,6 @@ def test_incremental_column_lineage_less_upstreams_in_gms_aspect(
     processed_wus = auto_incremental_lineage(
         graph=mock_graph,
         incremental_lineage=True,
-        include_column_level_lineage=True,
         stream=[
             MetadataChangeProposalWrapper(
                 entityUrn=urn, aspect=aspect, systemMetadata=system_metadata
@@ -227,7 +224,6 @@ def test_incremental_column_lineage_more_upstreams_in_gms_aspect(
     processed_wus = auto_incremental_lineage(
         graph=mock_graph,
         incremental_lineage=True,
-        include_column_level_lineage=True,
         stream=[
             MetadataChangeProposalWrapper(
                 entityUrn=urn, aspect=aspect, systemMetadata=system_metadata

From bb39d5418fcbf8bebbae1b510c63a1170865a072 Mon Sep 17 00:00:00 2001
From: Aseem Bansal <asmbansal2@gmail.com>
Date: Tue, 10 Oct 2023 16:08:34 +0530
Subject: [PATCH 107/156] fix(lint): run black, isort (#8978)

---
 .../tests/assertions/assertions_test.py       |  33 ++--
 smoke-test/tests/browse/browse_test.py        |  51 +++++--
 smoke-test/tests/cli/datahub-cli.py           |  76 +++++++---
 smoke-test/tests/cli/datahub_graph_test.py    |  12 +-
 .../cli/delete_cmd/test_timeseries_delete.py  |  12 +-
 .../ingest_cmd/test_timeseries_rollback.py    |   6 +-
 .../cli/user_groups_cmd/test_group_cmd.py     |   3 +-
 smoke-test/tests/conftest.py                  |   4 +-
 smoke-test/tests/consistency_utils.py         |  16 +-
 .../tests/containers/containers_test.py       |   4 +-
 smoke-test/tests/cypress/integration_test.py  |  23 ++-
 .../tests/dataproduct/test_dataproduct.py     |   4 +-
 smoke-test/tests/delete/delete_test.py        |  18 +--
 .../tests/deprecation/deprecation_test.py     |   9 +-
 smoke-test/tests/domains/domains_test.py      |  15 +-
 .../managed_ingestion_test.py                 |   3 +-
 smoke-test/tests/patch/common_patch_tests.py  |  52 ++-----
 .../tests/patch/test_datajob_patches.py       |  23 +--
 .../tests/patch/test_dataset_patches.py       |  18 ++-
 smoke-test/tests/policies/test_policies.py    |  10 +-
 .../tests/setup/lineage/helper_classes.py     |   5 +-
 .../setup/lineage/ingest_data_job_change.py   |  42 ++----
 .../lineage/ingest_dataset_join_change.py     |  36 ++---
 .../lineage/ingest_input_datasets_change.py   |  42 ++----
 .../setup/lineage/ingest_time_lineage.py      |  18 ++-
 smoke-test/tests/setup/lineage/utils.py       |  85 +++++------
 .../tags-and-terms/tags_and_terms_test.py     |   4 +-
 smoke-test/tests/telemetry/telemetry_test.py  |   4 +-
 smoke-test/tests/test_result_msg.py           |  23 ++-
 smoke-test/tests/test_stateful_ingestion.py   |  14 +-
 smoke-test/tests/tests/tests_test.py          |   7 +-
 smoke-test/tests/timeline/timeline_test.py    |  67 +++++----
 .../tokens/revokable_access_token_test.py     |  12 +-
 smoke-test/tests/utils.py                     |  17 +--
 smoke-test/tests/views/views_test.py          | 142 +++++++++---------
 35 files changed, 457 insertions(+), 453 deletions(-)

diff --git a/smoke-test/tests/assertions/assertions_test.py b/smoke-test/tests/assertions/assertions_test.py
index 4aa64c512f684..48f3564e6cd97 100644
--- a/smoke-test/tests/assertions/assertions_test.py
+++ b/smoke-test/tests/assertions/assertions_test.py
@@ -2,28 +2,29 @@
 import urllib
 
 import pytest
-import requests_wrapper as requests
 import tenacity
 from datahub.emitter.mce_builder import make_dataset_urn, make_schema_field_urn
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.common import PipelineContext, RecordEnvelope
 from datahub.ingestion.api.sink import NoopWriteCallback
 from datahub.ingestion.sink.file import FileSink, FileSinkConfig
-from datahub.metadata.com.linkedin.pegasus2avro.assertion import AssertionStdAggregation
-from datahub.metadata.schema_classes import (
-    AssertionInfoClass,
-    AssertionResultClass,
-    AssertionResultTypeClass,
-    AssertionRunEventClass,
-    AssertionRunStatusClass,
-    AssertionStdOperatorClass,
-    AssertionTypeClass,
-    DatasetAssertionInfoClass,
-    DatasetAssertionScopeClass,
-    PartitionSpecClass,
-    PartitionTypeClass,
-)
-from tests.utils import delete_urns_from_file, get_gms_url, ingest_file_via_rest, wait_for_healthcheck_util, get_sleep_info
+from datahub.metadata.com.linkedin.pegasus2avro.assertion import \
+    AssertionStdAggregation
+from datahub.metadata.schema_classes import (AssertionInfoClass,
+                                             AssertionResultClass,
+                                             AssertionResultTypeClass,
+                                             AssertionRunEventClass,
+                                             AssertionRunStatusClass,
+                                             AssertionStdOperatorClass,
+                                             AssertionTypeClass,
+                                             DatasetAssertionInfoClass,
+                                             DatasetAssertionScopeClass,
+                                             PartitionSpecClass,
+                                             PartitionTypeClass)
+
+import requests_wrapper as requests
+from tests.utils import (delete_urns_from_file, get_gms_url, get_sleep_info,
+                         ingest_file_via_rest, wait_for_healthcheck_util)
 
 restli_default_headers = {
     "X-RestLi-Protocol-Version": "2.0.0",
diff --git a/smoke-test/tests/browse/browse_test.py b/smoke-test/tests/browse/browse_test.py
index b9d2143d13ec7..550f0062d5a39 100644
--- a/smoke-test/tests/browse/browse_test.py
+++ b/smoke-test/tests/browse/browse_test.py
@@ -1,9 +1,10 @@
 import time
 
 import pytest
-import requests_wrapper as requests
-from tests.utils import delete_urns_from_file, get_frontend_url, ingest_file_via_rest
 
+import requests_wrapper as requests
+from tests.utils import (delete_urns_from_file, get_frontend_url,
+                         ingest_file_via_rest)
 
 TEST_DATASET_1_URN = "urn:li:dataset:(urn:li:dataPlatform:kafka,test-browse-1,PROD)"
 TEST_DATASET_2_URN = "urn:li:dataset:(urn:li:dataPlatform:kafka,test-browse-2,PROD)"
@@ -51,7 +52,9 @@ def test_get_browse_paths(frontend_session, ingest_cleanup_data):
     # /prod -- There should be one entity
     get_browse_paths_json = {
         "query": get_browse_paths_query,
-        "variables": {"input": { "type": "DATASET", "path": ["prod"], "start": 0, "count": 100 } },
+        "variables": {
+            "input": {"type": "DATASET", "path": ["prod"], "start": 0, "count": 100}
+        },
     }
 
     response = frontend_session.post(
@@ -67,12 +70,19 @@ def test_get_browse_paths(frontend_session, ingest_cleanup_data):
 
     browse = res_data["data"]["browse"]
     print(browse)
-    assert browse["entities"] == [{ "urn": TEST_DATASET_3_URN }]
+    assert browse["entities"] == [{"urn": TEST_DATASET_3_URN}]
 
     # /prod/kafka1
     get_browse_paths_json = {
         "query": get_browse_paths_query,
-        "variables": {"input": { "type": "DATASET", "path": ["prod", "kafka1"], "start": 0, "count": 10 } },
+        "variables": {
+            "input": {
+                "type": "DATASET",
+                "path": ["prod", "kafka1"],
+                "start": 0,
+                "count": 10,
+            }
+        },
     }
 
     response = frontend_session.post(
@@ -88,16 +98,27 @@ def test_get_browse_paths(frontend_session, ingest_cleanup_data):
 
     browse = res_data["data"]["browse"]
     assert browse == {
-      "total": 3,
-      "entities": [{ "urn": TEST_DATASET_1_URN }, { "urn": TEST_DATASET_2_URN }, { "urn": TEST_DATASET_3_URN }],
-      "groups": [],
-      "metadata": { "path": ["prod", "kafka1"], "totalNumEntities": 0 }
+        "total": 3,
+        "entities": [
+            {"urn": TEST_DATASET_1_URN},
+            {"urn": TEST_DATASET_2_URN},
+            {"urn": TEST_DATASET_3_URN},
+        ],
+        "groups": [],
+        "metadata": {"path": ["prod", "kafka1"], "totalNumEntities": 0},
     }
 
     # /prod/kafka2
     get_browse_paths_json = {
         "query": get_browse_paths_query,
-        "variables": {"input": { "type": "DATASET", "path": ["prod", "kafka2"], "start": 0, "count": 10 } },
+        "variables": {
+            "input": {
+                "type": "DATASET",
+                "path": ["prod", "kafka2"],
+                "start": 0,
+                "count": 10,
+            }
+        },
     }
 
     response = frontend_session.post(
@@ -113,10 +134,8 @@ def test_get_browse_paths(frontend_session, ingest_cleanup_data):
 
     browse = res_data["data"]["browse"]
     assert browse == {
-      "total": 2,
-      "entities": [{ "urn": TEST_DATASET_1_URN }, { "urn": TEST_DATASET_2_URN }],
-      "groups": [],
-      "metadata": { "path": ["prod", "kafka2"], "totalNumEntities": 0 }
+        "total": 2,
+        "entities": [{"urn": TEST_DATASET_1_URN}, {"urn": TEST_DATASET_2_URN}],
+        "groups": [],
+        "metadata": {"path": ["prod", "kafka2"], "totalNumEntities": 0},
     }
-
-
diff --git a/smoke-test/tests/cli/datahub-cli.py b/smoke-test/tests/cli/datahub-cli.py
index 1d0080bdd9d48..c3db6028efceb 100644
--- a/smoke-test/tests/cli/datahub-cli.py
+++ b/smoke-test/tests/cli/datahub-cli.py
@@ -1,8 +1,11 @@
 import json
-import pytest
 from time import sleep
-from datahub.cli.cli_utils import guess_entity_type, post_entity, get_aspects_for_entity
+
+import pytest
+from datahub.cli.cli_utils import (get_aspects_for_entity, guess_entity_type,
+                                   post_entity)
 from datahub.cli.ingest_cli import get_session_and_host, rollback
+
 from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync
 
 ingested_dataset_run_id = ""
@@ -24,24 +27,46 @@ def test_setup():
 
     session, gms_host = get_session_and_host()
 
-    assert "browsePaths" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False)
-    assert "editableDatasetProperties" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False)
+    assert "browsePaths" not in get_aspects_for_entity(
+        entity_urn=dataset_urn, aspects=["browsePaths"], typed=False
+    )
+    assert "editableDatasetProperties" not in get_aspects_for_entity(
+        entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False
+    )
 
-    ingested_dataset_run_id = ingest_file_via_rest("tests/cli/cli_test_data.json").config.run_id
+    ingested_dataset_run_id = ingest_file_via_rest(
+        "tests/cli/cli_test_data.json"
+    ).config.run_id
     print("Setup ingestion id: " + ingested_dataset_run_id)
 
-    assert "browsePaths" in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False)
+    assert "browsePaths" in get_aspects_for_entity(
+        entity_urn=dataset_urn, aspects=["browsePaths"], typed=False
+    )
 
     yield
 
     # Clean up
     rollback_url = f"{gms_host}/runs?action=rollback"
 
-    session.post(rollback_url, data=json.dumps({"runId": ingested_editable_run_id, "dryRun": False, "hardDelete": True}))
-    session.post(rollback_url, data=json.dumps({"runId": ingested_dataset_run_id, "dryRun": False, "hardDelete": True}))
+    session.post(
+        rollback_url,
+        data=json.dumps(
+            {"runId": ingested_editable_run_id, "dryRun": False, "hardDelete": True}
+        ),
+    )
+    session.post(
+        rollback_url,
+        data=json.dumps(
+            {"runId": ingested_dataset_run_id, "dryRun": False, "hardDelete": True}
+        ),
+    )
 
-    assert "browsePaths" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False)
-    assert "editableDatasetProperties" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False)
+    assert "browsePaths" not in get_aspects_for_entity(
+        entity_urn=dataset_urn, aspects=["browsePaths"], typed=False
+    )
+    assert "editableDatasetProperties" not in get_aspects_for_entity(
+        entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False
+    )
 
 
 @pytest.mark.dependency()
@@ -49,9 +74,7 @@ def test_rollback_editable():
     global ingested_dataset_run_id
     global ingested_editable_run_id
     platform = "urn:li:dataPlatform:kafka"
-    dataset_name = (
-        "test-rollback"
-    )
+    dataset_name = "test-rollback"
     env = "PROD"
     dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})"
 
@@ -59,23 +82,38 @@ def test_rollback_editable():
 
     print("Ingested dataset id:", ingested_dataset_run_id)
     # Assert that second data ingestion worked
-    assert "browsePaths" in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False)
+    assert "browsePaths" in get_aspects_for_entity(
+        entity_urn=dataset_urn, aspects=["browsePaths"], typed=False
+    )
 
     # Make editable change
-    ingested_editable_run_id = ingest_file_via_rest("tests/cli/cli_editable_test_data.json").config.run_id
+    ingested_editable_run_id = ingest_file_via_rest(
+        "tests/cli/cli_editable_test_data.json"
+    ).config.run_id
     print("ingested editable id:", ingested_editable_run_id)
     # Assert that second data ingestion worked
-    assert "editableDatasetProperties" in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False)
+    assert "editableDatasetProperties" in get_aspects_for_entity(
+        entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False
+    )
 
     # rollback ingestion 1
     rollback_url = f"{gms_host}/runs?action=rollback"
 
-    session.post(rollback_url, data=json.dumps({"runId": ingested_dataset_run_id, "dryRun": False, "hardDelete": False}))
+    session.post(
+        rollback_url,
+        data=json.dumps(
+            {"runId": ingested_dataset_run_id, "dryRun": False, "hardDelete": False}
+        ),
+    )
 
     # Allow async MCP processor to handle ingestions & rollbacks
     wait_for_writes_to_sync()
 
     # EditableDatasetProperties should still be part of the entity that was soft deleted.
-    assert "editableDatasetProperties" in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False)
+    assert "editableDatasetProperties" in get_aspects_for_entity(
+        entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False
+    )
     # But first ingestion aspects should not be present
-    assert "browsePaths" not in get_aspects_for_entity(entity_urn=dataset_urn, typed=False)
+    assert "browsePaths" not in get_aspects_for_entity(
+        entity_urn=dataset_urn, typed=False
+    )
diff --git a/smoke-test/tests/cli/datahub_graph_test.py b/smoke-test/tests/cli/datahub_graph_test.py
index 16925d26f6983..17c8924fb0998 100644
--- a/smoke-test/tests/cli/datahub_graph_test.py
+++ b/smoke-test/tests/cli/datahub_graph_test.py
@@ -1,13 +1,11 @@
 import pytest
 import tenacity
 from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
-from datahub.metadata.schema_classes import KafkaSchemaClass, SchemaMetadataClass
-from tests.utils import (
-    delete_urns_from_file,
-    get_gms_url,
-    get_sleep_info,
-    ingest_file_via_rest,
-)
+from datahub.metadata.schema_classes import (KafkaSchemaClass,
+                                             SchemaMetadataClass)
+
+from tests.utils import (delete_urns_from_file, get_gms_url, get_sleep_info,
+                         ingest_file_via_rest)
 
 sleep_sec, sleep_times = get_sleep_info()
 
diff --git a/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py b/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py
index 4288a61b7a0c1..106da7cd8d71e 100644
--- a/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py
+++ b/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py
@@ -1,21 +1,22 @@
 import json
 import logging
+import sys
 import tempfile
 import time
-import sys
 from json import JSONDecodeError
 from typing import Any, Dict, List, Optional
 
-from click.testing import CliRunner, Result
-
 import datahub.emitter.mce_builder as builder
+from click.testing import CliRunner, Result
 from datahub.emitter.serialization_helper import pre_json_transform
 from datahub.entrypoints import datahub
 from datahub.metadata.schema_classes import DatasetProfileClass
+
+import requests_wrapper as requests
 from tests.aspect_generators.timeseries.dataset_profile_gen import \
     gen_dataset_profiles
-from tests.utils import get_strftime_from_timestamp_millis, wait_for_writes_to_sync
-import requests_wrapper as requests
+from tests.utils import (get_strftime_from_timestamp_millis,
+                         wait_for_writes_to_sync)
 
 logger = logging.getLogger(__name__)
 
@@ -33,6 +34,7 @@
 def sync_elastic() -> None:
     wait_for_writes_to_sync()
 
+
 def datahub_put_profile(dataset_profile: DatasetProfileClass) -> None:
     with tempfile.NamedTemporaryFile("w+t", suffix=".json") as aspect_file:
         aspect_text: str = json.dumps(pre_json_transform(dataset_profile.to_obj()))
diff --git a/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py b/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py
index 61e7a5a65b494..e962b1a5cafd6 100644
--- a/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py
+++ b/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py
@@ -2,14 +2,14 @@
 import time
 from typing import Any, Dict, List, Optional
 
-from click.testing import CliRunner, Result
-
 import datahub.emitter.mce_builder as builder
+from click.testing import CliRunner, Result
 from datahub.emitter.serialization_helper import post_json_transform
 from datahub.entrypoints import datahub
 from datahub.metadata.schema_classes import DatasetProfileClass
-from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync
+
 import requests_wrapper as requests
+from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync
 
 runner = CliRunner(mix_stderr=False)
 
diff --git a/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py b/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py
index 405e061c016f9..7b986d3be0444 100644
--- a/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py
+++ b/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py
@@ -1,6 +1,7 @@
 import json
 import sys
 import tempfile
+import time
 from typing import Any, Dict, Iterable, List
 
 import yaml
@@ -8,7 +9,7 @@
 from datahub.api.entities.corpgroup.corpgroup import CorpGroup
 from datahub.entrypoints import datahub
 from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
-import time
+
 import requests_wrapper as requests
 from tests.utils import wait_for_writes_to_sync
 
diff --git a/smoke-test/tests/conftest.py b/smoke-test/tests/conftest.py
index eed7a983197ef..57b92a2db1c19 100644
--- a/smoke-test/tests/conftest.py
+++ b/smoke-test/tests/conftest.py
@@ -2,8 +2,8 @@
 
 import pytest
 
-from tests.utils import wait_for_healthcheck_util, get_frontend_session
 from tests.test_result_msg import send_message
+from tests.utils import get_frontend_session, wait_for_healthcheck_util
 
 # Disable telemetry
 os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false"
@@ -28,5 +28,5 @@ def test_healthchecks(wait_for_healthchecks):
 
 
 def pytest_sessionfinish(session, exitstatus):
-    """ whole test run finishes. """
+    """whole test run finishes."""
     send_message(exitstatus)
diff --git a/smoke-test/tests/consistency_utils.py b/smoke-test/tests/consistency_utils.py
index 15993733c592b..607835bf3649c 100644
--- a/smoke-test/tests/consistency_utils.py
+++ b/smoke-test/tests/consistency_utils.py
@@ -1,10 +1,16 @@
-import time
+import logging
 import os
 import subprocess
+import time
 
 _ELASTIC_BUFFER_WRITES_TIME_IN_SEC: int = 1
 USE_STATIC_SLEEP: bool = bool(os.getenv("USE_STATIC_SLEEP", False))
-ELASTICSEARCH_REFRESH_INTERVAL_SECONDS: int = int(os.getenv("ELASTICSEARCH_REFRESH_INTERVAL_SECONDS", 5))
+ELASTICSEARCH_REFRESH_INTERVAL_SECONDS: int = int(
+    os.getenv("ELASTICSEARCH_REFRESH_INTERVAL_SECONDS", 5)
+)
+
+logger = logging.getLogger(__name__)
+
 
 def wait_for_writes_to_sync(max_timeout_in_sec: int = 120) -> None:
     if USE_STATIC_SLEEP:
@@ -30,7 +36,9 @@ def wait_for_writes_to_sync(max_timeout_in_sec: int = 120) -> None:
             lag_zero = True
 
     if not lag_zero:
-        logger.warning(f"Exiting early from waiting for elastic to catch up due to a timeout. Current lag is {lag_values}")
+        logger.warning(
+            f"Exiting early from waiting for elastic to catch up due to a timeout. Current lag is {lag_values}"
+        )
     else:
         # we want to sleep for an additional period of time for Elastic writes buffer to clear
-        time.sleep(_ELASTIC_BUFFER_WRITES_TIME_IN_SEC)
\ No newline at end of file
+        time.sleep(_ELASTIC_BUFFER_WRITES_TIME_IN_SEC)
diff --git a/smoke-test/tests/containers/containers_test.py b/smoke-test/tests/containers/containers_test.py
index 575e3def6cf23..05a45239dabf8 100644
--- a/smoke-test/tests/containers/containers_test.py
+++ b/smoke-test/tests/containers/containers_test.py
@@ -1,5 +1,7 @@
 import pytest
-from tests.utils import delete_urns_from_file, get_frontend_url, ingest_file_via_rest
+
+from tests.utils import (delete_urns_from_file, get_frontend_url,
+                         ingest_file_via_rest)
 
 
 @pytest.fixture(scope="module", autouse=False)
diff --git a/smoke-test/tests/cypress/integration_test.py b/smoke-test/tests/cypress/integration_test.py
index b3bacf39ac7ae..4ad2bc53fa87d 100644
--- a/smoke-test/tests/cypress/integration_test.py
+++ b/smoke-test/tests/cypress/integration_test.py
@@ -1,18 +1,16 @@
-from typing import Set, List
-
 import datetime
-import pytest
-import subprocess
 import os
+import subprocess
+from typing import List, Set
+
+import pytest
+
+from tests.setup.lineage.ingest_time_lineage import (get_time_lineage_urns,
+                                                     ingest_time_lineage)
+from tests.utils import (create_datahub_step_state_aspects, delete_urns,
+                         delete_urns_from_file, get_admin_username,
+                         ingest_file_via_rest)
 
-from tests.utils import (
-    create_datahub_step_state_aspects,
-    get_admin_username,
-    ingest_file_via_rest,
-    delete_urns_from_file,
-    delete_urns,
-)
-from tests.setup.lineage.ingest_time_lineage import ingest_time_lineage, get_time_lineage_urns
 CYPRESS_TEST_DATA_DIR = "tests/cypress"
 
 TEST_DATA_FILENAME = "data.json"
@@ -145,7 +143,6 @@ def ingest_cleanup_data():
     delete_urns_from_file(f"{CYPRESS_TEST_DATA_DIR}/{TEST_ONBOARDING_DATA_FILENAME}")
     delete_urns(get_time_lineage_urns())
 
-
     print_now()
     print("deleting onboarding data file")
     if os.path.exists(f"{CYPRESS_TEST_DATA_DIR}/{TEST_ONBOARDING_DATA_FILENAME}"):
diff --git a/smoke-test/tests/dataproduct/test_dataproduct.py b/smoke-test/tests/dataproduct/test_dataproduct.py
index db198098f21fa..baef1cb1cb3ba 100644
--- a/smoke-test/tests/dataproduct/test_dataproduct.py
+++ b/smoke-test/tests/dataproduct/test_dataproduct.py
@@ -1,4 +1,6 @@
+import logging
 import os
+import subprocess
 import tempfile
 import time
 from random import randint
@@ -17,8 +19,6 @@
                                              DomainPropertiesClass,
                                              DomainsClass)
 from datahub.utilities.urns.urn import Urn
-import subprocess
-import logging
 
 logger = logging.getLogger(__name__)
 
diff --git a/smoke-test/tests/delete/delete_test.py b/smoke-test/tests/delete/delete_test.py
index 68e001f983fbf..d920faaf3a89a 100644
--- a/smoke-test/tests/delete/delete_test.py
+++ b/smoke-test/tests/delete/delete_test.py
@@ -1,16 +1,14 @@
-import os
 import json
-import pytest
+import os
 from time import sleep
+
+import pytest
 from datahub.cli.cli_utils import get_aspects_for_entity
 from datahub.cli.ingest_cli import get_session_and_host
-from tests.utils import (
-    ingest_file_via_rest,
-    wait_for_healthcheck_util,
-    delete_urns_from_file,
-    wait_for_writes_to_sync,
-    get_datahub_graph,
-)
+
+from tests.utils import (delete_urns_from_file, get_datahub_graph,
+                         ingest_file_via_rest, wait_for_healthcheck_util,
+                         wait_for_writes_to_sync)
 
 # Disable telemetry
 os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false"
@@ -102,7 +100,7 @@ def test_delete_reference(test_setup, depends=["test_healthchecks"]):
     graph.delete_references_to_urn(tag_urn, dry_run=False)
 
     wait_for_writes_to_sync()
-    
+
     # Validate that references no longer exist
     references_count, related_aspects = graph.delete_references_to_urn(
         tag_urn, dry_run=True
diff --git a/smoke-test/tests/deprecation/deprecation_test.py b/smoke-test/tests/deprecation/deprecation_test.py
index 1149a970aa8e5..a8969804d03d7 100644
--- a/smoke-test/tests/deprecation/deprecation_test.py
+++ b/smoke-test/tests/deprecation/deprecation_test.py
@@ -1,10 +1,7 @@
 import pytest
-from tests.utils import (
-    delete_urns_from_file,
-    get_frontend_url,
-    ingest_file_via_rest,
-    get_root_urn,
-)
+
+from tests.utils import (delete_urns_from_file, get_frontend_url, get_root_urn,
+                         ingest_file_via_rest)
 
 
 @pytest.fixture(scope="module", autouse=True)
diff --git a/smoke-test/tests/domains/domains_test.py b/smoke-test/tests/domains/domains_test.py
index 7ffe1682cafd8..fa8c918e3cbe1 100644
--- a/smoke-test/tests/domains/domains_test.py
+++ b/smoke-test/tests/domains/domains_test.py
@@ -1,12 +1,8 @@
 import pytest
 import tenacity
-from tests.utils import (
-    delete_urns_from_file,
-    get_frontend_url,
-    get_gms_url,
-    ingest_file_via_rest,
-    get_sleep_info,
-)
+
+from tests.utils import (delete_urns_from_file, get_frontend_url, get_gms_url,
+                         get_sleep_info, ingest_file_via_rest)
 
 sleep_sec, sleep_times = get_sleep_info()
 
@@ -240,4 +236,7 @@ def test_set_unset_domain(frontend_session, ingest_cleanup_data):
 
     assert res_data
     assert res_data["data"]["dataset"]["domain"]["domain"]["urn"] == domain_urn
-    assert res_data["data"]["dataset"]["domain"]["domain"]["properties"]["name"] == "Engineering"
+    assert (
+        res_data["data"]["dataset"]["domain"]["domain"]["properties"]["name"]
+        == "Engineering"
+    )
diff --git a/smoke-test/tests/managed-ingestion/managed_ingestion_test.py b/smoke-test/tests/managed-ingestion/managed_ingestion_test.py
index 1238a1dd5730a..b5e408731334e 100644
--- a/smoke-test/tests/managed-ingestion/managed_ingestion_test.py
+++ b/smoke-test/tests/managed-ingestion/managed_ingestion_test.py
@@ -3,7 +3,8 @@
 import pytest
 import tenacity
 
-from tests.utils import get_frontend_url, get_sleep_info, wait_for_healthcheck_util
+from tests.utils import (get_frontend_url, get_sleep_info,
+                         wait_for_healthcheck_util)
 
 sleep_sec, sleep_times = get_sleep_info()
 
diff --git a/smoke-test/tests/patch/common_patch_tests.py b/smoke-test/tests/patch/common_patch_tests.py
index 574e4fd4e4c88..f1d6abf5da794 100644
--- a/smoke-test/tests/patch/common_patch_tests.py
+++ b/smoke-test/tests/patch/common_patch_tests.py
@@ -2,25 +2,17 @@
 import uuid
 from typing import Dict, Optional, Type
 
-from datahub.emitter.mce_builder import (
-    make_tag_urn,
-    make_term_urn,
-    make_user_urn,
-)
+from datahub.emitter.mce_builder import (make_tag_urn, make_term_urn,
+                                         make_user_urn)
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
 from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig
-from datahub.metadata.schema_classes import (
-    AuditStampClass,
-    GlobalTagsClass,
-    GlossaryTermAssociationClass,
-    GlossaryTermsClass,
-    OwnerClass,
-    OwnershipClass,
-    OwnershipTypeClass,
-    TagAssociationClass,
-    _Aspect,
-)
+from datahub.metadata.schema_classes import (AuditStampClass, GlobalTagsClass,
+                                             GlossaryTermAssociationClass,
+                                             GlossaryTermsClass, OwnerClass,
+                                             OwnershipClass,
+                                             OwnershipTypeClass,
+                                             TagAssociationClass, _Aspect)
 
 
 def helper_test_entity_terms_patch(
@@ -34,18 +26,14 @@ def get_terms(graph, entity_urn):
 
     term_urn = make_term_urn(term=f"testTerm-{uuid.uuid4()}")
 
-    term_association = GlossaryTermAssociationClass(
-        urn=term_urn, context="test"
-    )
+    term_association = GlossaryTermAssociationClass(urn=term_urn, context="test")
     global_terms = GlossaryTermsClass(
         terms=[term_association],
         auditStamp=AuditStampClass(
             time=int(time.time() * 1000.0), actor=make_user_urn("tester")
         ),
     )
-    mcpw = MetadataChangeProposalWrapper(
-        entityUrn=test_entity_urn, aspect=global_terms
-    )
+    mcpw = MetadataChangeProposalWrapper(entityUrn=test_entity_urn, aspect=global_terms)
 
     with DataHubGraph(DataHubGraphConfig()) as graph:
         graph.emit_mcp(mcpw)
@@ -88,9 +76,7 @@ def helper_test_dataset_tags_patch(
 
     tag_association = TagAssociationClass(tag=tag_urn, context="test")
     global_tags = GlobalTagsClass(tags=[tag_association])
-    mcpw = MetadataChangeProposalWrapper(
-        entityUrn=test_entity_urn, aspect=global_tags
-    )
+    mcpw = MetadataChangeProposalWrapper(entityUrn=test_entity_urn, aspect=global_tags)
 
     with DataHubGraph(DataHubGraphConfig()) as graph:
         graph.emit_mcp(mcpw)
@@ -153,15 +139,11 @@ def helper_test_ownership_patch(
         assert owner.owners[0].owner == make_user_urn("jdoe")
 
         for patch_mcp in (
-            patch_builder_class(test_entity_urn)
-            .add_owner(owner_to_add)
-            .build()
+            patch_builder_class(test_entity_urn).add_owner(owner_to_add).build()
         ):
             graph.emit_mcp(patch_mcp)
 
-        owner = graph.get_aspect(
-            entity_urn=test_entity_urn, aspect_type=OwnershipClass
-        )
+        owner = graph.get_aspect(entity_urn=test_entity_urn, aspect_type=OwnershipClass)
         assert len(owner.owners) == 2
 
         for patch_mcp in (
@@ -171,9 +153,7 @@ def helper_test_ownership_patch(
         ):
             graph.emit_mcp(patch_mcp)
 
-        owner = graph.get_aspect(
-            entity_urn=test_entity_urn, aspect_type=OwnershipClass
-        )
+        owner = graph.get_aspect(entity_urn=test_entity_urn, aspect_type=OwnershipClass)
         assert len(owner.owners) == 1
         assert owner.owners[0].owner == make_user_urn("jdoe")
 
@@ -199,9 +179,7 @@ def get_custom_properties(
     orig_aspect = base_aspect
     assert hasattr(orig_aspect, "customProperties")
     orig_aspect.customProperties = base_property_map
-    mcpw = MetadataChangeProposalWrapper(
-        entityUrn=test_entity_urn, aspect=orig_aspect
-    )
+    mcpw = MetadataChangeProposalWrapper(entityUrn=test_entity_urn, aspect=orig_aspect)
 
     with DataHubGraph(DataHubGraphConfig()) as graph:
         graph.emit(mcpw)
diff --git a/smoke-test/tests/patch/test_datajob_patches.py b/smoke-test/tests/patch/test_datajob_patches.py
index 407410ee89914..342d5d683228a 100644
--- a/smoke-test/tests/patch/test_datajob_patches.py
+++ b/smoke-test/tests/patch/test_datajob_patches.py
@@ -3,19 +3,14 @@
 from datahub.emitter.mce_builder import make_data_job_urn, make_dataset_urn
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig
-from datahub.metadata.schema_classes import (
-    DataJobInfoClass,
-    DataJobInputOutputClass,
-    EdgeClass,
-)
+from datahub.metadata.schema_classes import (DataJobInfoClass,
+                                             DataJobInputOutputClass,
+                                             EdgeClass)
 from datahub.specific.datajob import DataJobPatchBuilder
 
 from tests.patch.common_patch_tests import (
-    helper_test_custom_properties_patch,
-    helper_test_dataset_tags_patch,
-    helper_test_entity_terms_patch,
-    helper_test_ownership_patch,
-)
+    helper_test_custom_properties_patch, helper_test_dataset_tags_patch,
+    helper_test_entity_terms_patch, helper_test_ownership_patch)
 
 
 def _make_test_datajob_urn(
@@ -37,16 +32,12 @@ def test_datajob_ownership_patch(wait_for_healthchecks):
 
 # Tags
 def test_datajob_tags_patch(wait_for_healthchecks):
-    helper_test_dataset_tags_patch(
-        _make_test_datajob_urn(), DataJobPatchBuilder
-    )
+    helper_test_dataset_tags_patch(_make_test_datajob_urn(), DataJobPatchBuilder)
 
 
 # Terms
 def test_dataset_terms_patch(wait_for_healthchecks):
-    helper_test_entity_terms_patch(
-        _make_test_datajob_urn(), DataJobPatchBuilder
-    )
+    helper_test_entity_terms_patch(_make_test_datajob_urn(), DataJobPatchBuilder)
 
 
 # Custom Properties
diff --git a/smoke-test/tests/patch/test_dataset_patches.py b/smoke-test/tests/patch/test_dataset_patches.py
index 239aab64675d8..6704d19760fb9 100644
--- a/smoke-test/tests/patch/test_dataset_patches.py
+++ b/smoke-test/tests/patch/test_dataset_patches.py
@@ -20,7 +20,10 @@
                                              UpstreamClass,
                                              UpstreamLineageClass)
 from datahub.specific.dataset import DatasetPatchBuilder
-from tests.patch.common_patch_tests import helper_test_entity_terms_patch, helper_test_dataset_tags_patch, helper_test_ownership_patch, helper_test_custom_properties_patch
+
+from tests.patch.common_patch_tests import (
+    helper_test_custom_properties_patch, helper_test_dataset_tags_patch,
+    helper_test_entity_terms_patch, helper_test_ownership_patch)
 
 
 # Common Aspect Patch Tests
@@ -31,6 +34,7 @@ def test_dataset_ownership_patch(wait_for_healthchecks):
     )
     helper_test_ownership_patch(dataset_urn, DatasetPatchBuilder)
 
+
 # Tags
 def test_dataset_tags_patch(wait_for_healthchecks):
     dataset_urn = make_dataset_urn(
@@ -38,6 +42,7 @@ def test_dataset_tags_patch(wait_for_healthchecks):
     )
     helper_test_dataset_tags_patch(dataset_urn, DatasetPatchBuilder)
 
+
 # Terms
 def test_dataset_terms_patch(wait_for_healthchecks):
     dataset_urn = make_dataset_urn(
@@ -284,8 +289,15 @@ def test_custom_properties_patch(wait_for_healthchecks):
     dataset_urn = make_dataset_urn(
         platform="hive", name=f"SampleHiveDataset-{uuid.uuid4()}", env="PROD"
     )
-    orig_dataset_properties = DatasetPropertiesClass(name="test_name", description="test_description")
-    helper_test_custom_properties_patch(test_entity_urn=dataset_urn, patch_builder_class=DatasetPatchBuilder, custom_properties_aspect_class=DatasetPropertiesClass, base_aspect=orig_dataset_properties)
+    orig_dataset_properties = DatasetPropertiesClass(
+        name="test_name", description="test_description"
+    )
+    helper_test_custom_properties_patch(
+        test_entity_urn=dataset_urn,
+        patch_builder_class=DatasetPatchBuilder,
+        custom_properties_aspect_class=DatasetPropertiesClass,
+        base_aspect=orig_dataset_properties,
+    )
 
     with DataHubGraph(DataHubGraphConfig()) as graph:
         # Patch custom properties along with name
diff --git a/smoke-test/tests/policies/test_policies.py b/smoke-test/tests/policies/test_policies.py
index b7091541894dd..67142181d2b96 100644
--- a/smoke-test/tests/policies/test_policies.py
+++ b/smoke-test/tests/policies/test_policies.py
@@ -1,12 +1,8 @@
 import pytest
 import tenacity
-from tests.utils import (
-    get_frontend_url,
-    wait_for_healthcheck_util,
-    get_frontend_session,
-    get_sleep_info,
-    get_root_urn,
-)
+
+from tests.utils import (get_frontend_session, get_frontend_url, get_root_urn,
+                         get_sleep_info, wait_for_healthcheck_util)
 
 TEST_POLICY_NAME = "Updated Platform Policy"
 
diff --git a/smoke-test/tests/setup/lineage/helper_classes.py b/smoke-test/tests/setup/lineage/helper_classes.py
index 53f77b08d15ed..d550f3093be85 100644
--- a/smoke-test/tests/setup/lineage/helper_classes.py
+++ b/smoke-test/tests/setup/lineage/helper_classes.py
@@ -1,10 +1,7 @@
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional
 
-from datahub.metadata.schema_classes import (
-    EdgeClass,
-    SchemaFieldDataTypeClass,
-)
+from datahub.metadata.schema_classes import EdgeClass, SchemaFieldDataTypeClass
 
 
 @dataclass
diff --git a/smoke-test/tests/setup/lineage/ingest_data_job_change.py b/smoke-test/tests/setup/lineage/ingest_data_job_change.py
index 8e3e9c5352922..588a1625419bc 100644
--- a/smoke-test/tests/setup/lineage/ingest_data_job_change.py
+++ b/smoke-test/tests/setup/lineage/ingest_data_job_change.py
@@ -1,36 +1,20 @@
 from typing import List
 
-from datahub.emitter.mce_builder import (
-    make_dataset_urn,
-    make_data_flow_urn,
-    make_data_job_urn_with_flow,
-)
+from datahub.emitter.mce_builder import (make_data_flow_urn,
+                                         make_data_job_urn_with_flow,
+                                         make_dataset_urn)
 from datahub.emitter.rest_emitter import DatahubRestEmitter
-from datahub.metadata.schema_classes import (
-    DateTypeClass,
-    NumberTypeClass,
-    SchemaFieldDataTypeClass,
-    StringTypeClass,
-)
+from datahub.metadata.schema_classes import (DateTypeClass, NumberTypeClass,
+                                             SchemaFieldDataTypeClass,
+                                             StringTypeClass)
 
-from tests.setup.lineage.constants import (
-    AIRFLOW_DATA_PLATFORM,
-    SNOWFLAKE_DATA_PLATFORM,
-    TIMESTAMP_MILLIS_EIGHT_DAYS_AGO,
-    TIMESTAMP_MILLIS_ONE_DAY_AGO,
-)
-from tests.setup.lineage.helper_classes import (
-    Field,
-    Dataset,
-    Task,
-    Pipeline,
-)
-from tests.setup.lineage.utils import (
-    create_edge,
-    create_node,
-    create_nodes_and_edges,
-    emit_mcps,
-)
+from tests.setup.lineage.constants import (AIRFLOW_DATA_PLATFORM,
+                                           SNOWFLAKE_DATA_PLATFORM,
+                                           TIMESTAMP_MILLIS_EIGHT_DAYS_AGO,
+                                           TIMESTAMP_MILLIS_ONE_DAY_AGO)
+from tests.setup.lineage.helper_classes import Dataset, Field, Pipeline, Task
+from tests.setup.lineage.utils import (create_edge, create_node,
+                                       create_nodes_and_edges, emit_mcps)
 
 # Constants for Case 2
 DAILY_TEMPERATURE_DATASET_ID = "climate.daily_temperature"
diff --git a/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py b/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py
index 35a8e6d5cf02e..bb9f51b6b5e9b 100644
--- a/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py
+++ b/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py
@@ -1,32 +1,18 @@
 from typing import List
 
-from datahub.emitter.mce_builder import (
-    make_dataset_urn,
-)
+from datahub.emitter.mce_builder import make_dataset_urn
 from datahub.emitter.rest_emitter import DatahubRestEmitter
-from datahub.metadata.schema_classes import (
-    NumberTypeClass,
-    SchemaFieldDataTypeClass,
-    StringTypeClass,
-    UpstreamClass,
-)
+from datahub.metadata.schema_classes import (NumberTypeClass,
+                                             SchemaFieldDataTypeClass,
+                                             StringTypeClass, UpstreamClass)
 
-from tests.setup.lineage.constants import (
-    DATASET_ENTITY_TYPE,
-    SNOWFLAKE_DATA_PLATFORM,
-    TIMESTAMP_MILLIS_EIGHT_DAYS_AGO,
-    TIMESTAMP_MILLIS_ONE_DAY_AGO,
-)
-from tests.setup.lineage.helper_classes import (
-    Field,
-    Dataset,
-)
-from tests.setup.lineage.utils import (
-    create_node,
-    create_upstream_edge,
-    create_upstream_mcp,
-    emit_mcps,
-)
+from tests.setup.lineage.constants import (DATASET_ENTITY_TYPE,
+                                           SNOWFLAKE_DATA_PLATFORM,
+                                           TIMESTAMP_MILLIS_EIGHT_DAYS_AGO,
+                                           TIMESTAMP_MILLIS_ONE_DAY_AGO)
+from tests.setup.lineage.helper_classes import Dataset, Field
+from tests.setup.lineage.utils import (create_node, create_upstream_edge,
+                                       create_upstream_mcp, emit_mcps)
 
 # Constants for Case 3
 GDP_DATASET_ID = "economic_data.gdp"
diff --git a/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py b/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py
index f4fb795147478..6079d7a3d2b63 100644
--- a/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py
+++ b/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py
@@ -1,36 +1,20 @@
 from typing import List
 
-from datahub.emitter.mce_builder import (
-    make_dataset_urn,
-    make_data_flow_urn,
-    make_data_job_urn_with_flow,
-)
+from datahub.emitter.mce_builder import (make_data_flow_urn,
+                                         make_data_job_urn_with_flow,
+                                         make_dataset_urn)
 from datahub.emitter.rest_emitter import DatahubRestEmitter
-from datahub.metadata.schema_classes import (
-    NumberTypeClass,
-    SchemaFieldDataTypeClass,
-    StringTypeClass,
-)
-
-from tests.setup.lineage.constants import (
-    AIRFLOW_DATA_PLATFORM,
-    BQ_DATA_PLATFORM,
-    TIMESTAMP_MILLIS_EIGHT_DAYS_AGO,
-    TIMESTAMP_MILLIS_ONE_DAY_AGO,
-)
-from tests.setup.lineage.helper_classes import (
-    Field,
-    Dataset,
-    Task,
-    Pipeline,
-)
-from tests.setup.lineage.utils import (
-    create_edge,
-    create_node,
-    create_nodes_and_edges,
-    emit_mcps,
-)
+from datahub.metadata.schema_classes import (NumberTypeClass,
+                                             SchemaFieldDataTypeClass,
+                                             StringTypeClass)
 
+from tests.setup.lineage.constants import (AIRFLOW_DATA_PLATFORM,
+                                           BQ_DATA_PLATFORM,
+                                           TIMESTAMP_MILLIS_EIGHT_DAYS_AGO,
+                                           TIMESTAMP_MILLIS_ONE_DAY_AGO)
+from tests.setup.lineage.helper_classes import Dataset, Field, Pipeline, Task
+from tests.setup.lineage.utils import (create_edge, create_node,
+                                       create_nodes_and_edges, emit_mcps)
 
 # Constants for Case 1
 TRANSACTIONS_DATASET_ID = "transactions.transactions"
diff --git a/smoke-test/tests/setup/lineage/ingest_time_lineage.py b/smoke-test/tests/setup/lineage/ingest_time_lineage.py
index cae8e0124d501..3aec979707290 100644
--- a/smoke-test/tests/setup/lineage/ingest_time_lineage.py
+++ b/smoke-test/tests/setup/lineage/ingest_time_lineage.py
@@ -1,12 +1,14 @@
+import os
 from typing import List
 
 from datahub.emitter.rest_emitter import DatahubRestEmitter
 
-from tests.setup.lineage.ingest_input_datasets_change import ingest_input_datasets_change, get_input_datasets_change_urns
-from tests.setup.lineage.ingest_data_job_change import ingest_data_job_change, get_data_job_change_urns
-from tests.setup.lineage.ingest_dataset_join_change import ingest_dataset_join_change, get_dataset_join_change_urns
-
-import os
+from tests.setup.lineage.ingest_data_job_change import (
+    get_data_job_change_urns, ingest_data_job_change)
+from tests.setup.lineage.ingest_dataset_join_change import (
+    get_dataset_join_change_urns, ingest_dataset_join_change)
+from tests.setup.lineage.ingest_input_datasets_change import (
+    get_input_datasets_change_urns, ingest_input_datasets_change)
 
 SERVER = os.getenv("DATAHUB_SERVER") or "http://localhost:8080"
 TOKEN = os.getenv("DATAHUB_TOKEN") or ""
@@ -20,4 +22,8 @@ def ingest_time_lineage() -> None:
 
 
 def get_time_lineage_urns() -> List[str]:
-    return get_input_datasets_change_urns() + get_data_job_change_urns() + get_dataset_join_change_urns()
+    return (
+        get_input_datasets_change_urns()
+        + get_data_job_change_urns()
+        + get_dataset_join_change_urns()
+    )
diff --git a/smoke-test/tests/setup/lineage/utils.py b/smoke-test/tests/setup/lineage/utils.py
index 672f7a945a6af..c72f6ccb89b7a 100644
--- a/smoke-test/tests/setup/lineage/utils.py
+++ b/smoke-test/tests/setup/lineage/utils.py
@@ -1,41 +1,30 @@
 import datetime
-from datahub.emitter.mce_builder import (
-    make_data_platform_urn,
-    make_dataset_urn,
-    make_data_job_urn_with_flow,
-    make_data_flow_urn,
-)
+from typing import List
+
+from datahub.emitter.mce_builder import (make_data_flow_urn,
+                                         make_data_job_urn_with_flow,
+                                         make_data_platform_urn,
+                                         make_dataset_urn)
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.rest_emitter import DatahubRestEmitter
 from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage
-from datahub.metadata.schema_classes import (
-    AuditStampClass,
-    ChangeTypeClass,
-    DatasetLineageTypeClass,
-    DatasetPropertiesClass,
-    DataFlowInfoClass,
-    DataJobInputOutputClass,
-    DataJobInfoClass,
-    EdgeClass,
-    MySqlDDLClass,
-    SchemaFieldClass,
-    SchemaMetadataClass,
-    UpstreamClass,
-)
-from typing import List
-
-from tests.setup.lineage.constants import (
-    DATASET_ENTITY_TYPE,
-    DATA_JOB_ENTITY_TYPE,
-    DATA_FLOW_ENTITY_TYPE,
-    DATA_FLOW_INFO_ASPECT_NAME,
-    DATA_JOB_INFO_ASPECT_NAME,
-    DATA_JOB_INPUT_OUTPUT_ASPECT_NAME,
-)
-from tests.setup.lineage.helper_classes import (
-    Dataset,
-    Pipeline,
-)
+from datahub.metadata.schema_classes import (AuditStampClass, ChangeTypeClass,
+                                             DataFlowInfoClass,
+                                             DataJobInfoClass,
+                                             DataJobInputOutputClass,
+                                             DatasetLineageTypeClass,
+                                             DatasetPropertiesClass, EdgeClass,
+                                             MySqlDDLClass, SchemaFieldClass,
+                                             SchemaMetadataClass,
+                                             UpstreamClass)
+
+from tests.setup.lineage.constants import (DATA_FLOW_ENTITY_TYPE,
+                                           DATA_FLOW_INFO_ASPECT_NAME,
+                                           DATA_JOB_ENTITY_TYPE,
+                                           DATA_JOB_INFO_ASPECT_NAME,
+                                           DATA_JOB_INPUT_OUTPUT_ASPECT_NAME,
+                                           DATASET_ENTITY_TYPE)
+from tests.setup.lineage.helper_classes import Dataset, Pipeline
 
 
 def create_node(dataset: Dataset) -> List[MetadataChangeProposalWrapper]:
@@ -85,10 +74,10 @@ def create_node(dataset: Dataset) -> List[MetadataChangeProposalWrapper]:
 
 
 def create_edge(
-        source_urn: str,
-        destination_urn: str,
-        created_timestamp_millis: int,
-        updated_timestamp_millis: int,
+    source_urn: str,
+    destination_urn: str,
+    created_timestamp_millis: int,
+    updated_timestamp_millis: int,
 ) -> EdgeClass:
     created_audit_stamp: AuditStampClass = AuditStampClass(
         time=created_timestamp_millis, actor="urn:li:corpuser:unknown"
@@ -105,7 +94,7 @@ def create_edge(
 
 
 def create_nodes_and_edges(
-        airflow_dag: Pipeline,
+    airflow_dag: Pipeline,
 ) -> List[MetadataChangeProposalWrapper]:
     mcps = []
     data_flow_urn = make_data_flow_urn(
@@ -160,9 +149,9 @@ def create_nodes_and_edges(
 
 
 def create_upstream_edge(
-        upstream_entity_urn: str,
-        created_timestamp_millis: int,
-        updated_timestamp_millis: int,
+    upstream_entity_urn: str,
+    created_timestamp_millis: int,
+    updated_timestamp_millis: int,
 ):
     created_audit_stamp: AuditStampClass = AuditStampClass(
         time=created_timestamp_millis, actor="urn:li:corpuser:unknown"
@@ -180,11 +169,11 @@ def create_upstream_edge(
 
 
 def create_upstream_mcp(
-        entity_type: str,
-        entity_urn: str,
-        upstreams: List[UpstreamClass],
-        timestamp_millis: int,
-        run_id: str = "",
+    entity_type: str,
+    entity_urn: str,
+    upstreams: List[UpstreamClass],
+    timestamp_millis: int,
+    run_id: str = "",
 ) -> MetadataChangeProposalWrapper:
     print(f"Creating upstreamLineage aspect for {entity_urn}")
     timestamp_millis: int = int(datetime.datetime.now().timestamp() * 1000)
@@ -203,7 +192,7 @@ def create_upstream_mcp(
 
 
 def emit_mcps(
-        emitter: DatahubRestEmitter, mcps: List[MetadataChangeProposalWrapper]
+    emitter: DatahubRestEmitter, mcps: List[MetadataChangeProposalWrapper]
 ) -> None:
     for mcp in mcps:
         emitter.emit_mcp(mcp)
diff --git a/smoke-test/tests/tags-and-terms/tags_and_terms_test.py b/smoke-test/tests/tags-and-terms/tags_and_terms_test.py
index b0ca29b544cfe..6ac75765286f0 100644
--- a/smoke-test/tests/tags-and-terms/tags_and_terms_test.py
+++ b/smoke-test/tests/tags-and-terms/tags_and_terms_test.py
@@ -1,5 +1,7 @@
 import pytest
-from tests.utils import delete_urns_from_file, get_frontend_url, ingest_file_via_rest, wait_for_healthcheck_util
+
+from tests.utils import (delete_urns_from_file, get_frontend_url,
+                         ingest_file_via_rest, wait_for_healthcheck_util)
 
 
 @pytest.fixture(scope="module", autouse=True)
diff --git a/smoke-test/tests/telemetry/telemetry_test.py b/smoke-test/tests/telemetry/telemetry_test.py
index 3672abcda948d..3127061c9f506 100644
--- a/smoke-test/tests/telemetry/telemetry_test.py
+++ b/smoke-test/tests/telemetry/telemetry_test.py
@@ -7,5 +7,7 @@ def test_no_clientID():
     client_id_urn = "urn:li:telemetry:clientId"
     aspect = ["telemetryClientId"]
 
-    res_data = json.dumps(get_aspects_for_entity(entity_urn=client_id_urn, aspects=aspect, typed=False))
+    res_data = json.dumps(
+        get_aspects_for_entity(entity_urn=client_id_urn, aspects=aspect, typed=False)
+    )
     assert res_data == "{}"
diff --git a/smoke-test/tests/test_result_msg.py b/smoke-test/tests/test_result_msg.py
index e3b336db9d66c..b9775e8ee4acd 100644
--- a/smoke-test/tests/test_result_msg.py
+++ b/smoke-test/tests/test_result_msg.py
@@ -1,6 +1,6 @@
-from slack_sdk import WebClient
 import os
 
+from slack_sdk import WebClient
 
 datahub_stats = {}
 
@@ -10,10 +10,10 @@ def add_datahub_stats(stat_name, stat_val):
 
 
 def send_to_slack(passed: str):
-    slack_api_token = os.getenv('SLACK_API_TOKEN')
-    slack_channel = os.getenv('SLACK_CHANNEL')
-    slack_thread_ts = os.getenv('SLACK_THREAD_TS')
-    test_identifier = os.getenv('TEST_IDENTIFIER', 'LOCAL_TEST')
+    slack_api_token = os.getenv("SLACK_API_TOKEN")
+    slack_channel = os.getenv("SLACK_CHANNEL")
+    slack_thread_ts = os.getenv("SLACK_THREAD_TS")
+    test_identifier = os.getenv("TEST_IDENTIFIER", "LOCAL_TEST")
     if slack_api_token is None or slack_channel is None:
         return
     client = WebClient(token=slack_api_token)
@@ -26,14 +26,21 @@ def send_to_slack(passed: str):
             message += f"Num {entity_type} is {val}\n"
 
     if slack_thread_ts is None:
-        client.chat_postMessage(channel=slack_channel, text=f'{test_identifier} Status - {passed}\n{message}')
+        client.chat_postMessage(
+            channel=slack_channel,
+            text=f"{test_identifier} Status - {passed}\n{message}",
+        )
     else:
-        client.chat_postMessage(channel=slack_channel, text=f'{test_identifier} Status - {passed}\n{message}', thread_ts=slack_thread_ts)
+        client.chat_postMessage(
+            channel=slack_channel,
+            text=f"{test_identifier} Status - {passed}\n{message}",
+            thread_ts=slack_thread_ts,
+        )
 
 
 def send_message(exitstatus):
     try:
-        send_to_slack('PASSED' if exitstatus == 0 else 'FAILED')
+        send_to_slack("PASSED" if exitstatus == 0 else "FAILED")
     except Exception as e:
         # We don't want to fail pytest at all
         print(f"Exception happened for sending msg to slack {e}")
diff --git a/smoke-test/tests/test_stateful_ingestion.py b/smoke-test/tests/test_stateful_ingestion.py
index a10cf13a08029..c6adb402e5d51 100644
--- a/smoke-test/tests/test_stateful_ingestion.py
+++ b/smoke-test/tests/test_stateful_ingestion.py
@@ -4,17 +4,15 @@
 from datahub.ingestion.run.pipeline import Pipeline
 from datahub.ingestion.source.sql.mysql import MySQLConfig, MySQLSource
 from datahub.ingestion.source.state.checkpoint import Checkpoint
-from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState
-from datahub.ingestion.source.state.stale_entity_removal_handler import StaleEntityRemovalHandler
+from datahub.ingestion.source.state.entity_removal_state import \
+    GenericCheckpointState
+from datahub.ingestion.source.state.stale_entity_removal_handler import \
+    StaleEntityRemovalHandler
 from sqlalchemy import create_engine
 from sqlalchemy.sql import text
 
-from tests.utils import (
-    get_gms_url,
-    get_mysql_password,
-    get_mysql_url,
-    get_mysql_username,
-)
+from tests.utils import (get_gms_url, get_mysql_password, get_mysql_url,
+                         get_mysql_username)
 
 
 def test_stateful_ingestion(wait_for_healthchecks):
diff --git a/smoke-test/tests/tests/tests_test.py b/smoke-test/tests/tests/tests_test.py
index 0b87f90a92c58..213a2ea087b7a 100644
--- a/smoke-test/tests/tests/tests_test.py
+++ b/smoke-test/tests/tests/tests_test.py
@@ -1,9 +1,13 @@
 import pytest
 import tenacity
-from tests.utils import delete_urns_from_file, get_frontend_url, ingest_file_via_rest, wait_for_healthcheck_util, get_sleep_info
+
+from tests.utils import (delete_urns_from_file, get_frontend_url,
+                         get_sleep_info, ingest_file_via_rest,
+                         wait_for_healthcheck_util)
 
 sleep_sec, sleep_times = get_sleep_info()
 
+
 @pytest.fixture(scope="module", autouse=True)
 def ingest_cleanup_data(request):
     print("ingesting test data")
@@ -18,6 +22,7 @@ def wait_for_healthchecks():
     wait_for_healthcheck_util()
     yield
 
+
 @pytest.mark.dependency()
 def test_healthchecks(wait_for_healthchecks):
     # Call to wait_for_healthchecks fixture will do the actual functionality.
diff --git a/smoke-test/tests/timeline/timeline_test.py b/smoke-test/tests/timeline/timeline_test.py
index a73d585c6c72d..4705343c1a2ba 100644
--- a/smoke-test/tests/timeline/timeline_test.py
+++ b/smoke-test/tests/timeline/timeline_test.py
@@ -3,14 +3,14 @@
 
 from datahub.cli import timeline_cli
 from datahub.cli.cli_utils import guess_entity_type, post_entity
-from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync, get_datahub_graph
+
+from tests.utils import (get_datahub_graph, ingest_file_via_rest,
+                         wait_for_writes_to_sync)
 
 
 def test_all():
     platform = "urn:li:dataPlatform:kafka"
-    dataset_name = (
-        "test-timeline-sample-kafka"
-    )
+    dataset_name = "test-timeline-sample-kafka"
     env = "PROD"
     dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})"
 
@@ -18,8 +18,13 @@ def test_all():
     ingest_file_via_rest("tests/timeline/timeline_test_datav2.json")
     ingest_file_via_rest("tests/timeline/timeline_test_datav3.json")
 
-    res_data = timeline_cli.get_timeline(dataset_urn, ["TAG", "DOCUMENTATION", "TECHNICAL_SCHEMA", "GLOSSARY_TERM",
-                                                       "OWNER"], None, None, False)
+    res_data = timeline_cli.get_timeline(
+        dataset_urn,
+        ["TAG", "DOCUMENTATION", "TECHNICAL_SCHEMA", "GLOSSARY_TERM", "OWNER"],
+        None,
+        None,
+        False,
+    )
     get_datahub_graph().hard_delete_entity(urn=dataset_urn)
 
     assert res_data
@@ -35,9 +40,7 @@ def test_all():
 
 def test_schema():
     platform = "urn:li:dataPlatform:kafka"
-    dataset_name = (
-        "test-timeline-sample-kafka"
-    )
+    dataset_name = "test-timeline-sample-kafka"
     env = "PROD"
     dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})"
 
@@ -45,7 +48,9 @@ def test_schema():
     put(dataset_urn, "schemaMetadata", "test_resources/timeline/newschemav2.json")
     put(dataset_urn, "schemaMetadata", "test_resources/timeline/newschemav3.json")
 
-    res_data = timeline_cli.get_timeline(dataset_urn, ["TECHNICAL_SCHEMA"], None, None, False)
+    res_data = timeline_cli.get_timeline(
+        dataset_urn, ["TECHNICAL_SCHEMA"], None, None, False
+    )
 
     get_datahub_graph().hard_delete_entity(urn=dataset_urn)
     assert res_data
@@ -61,9 +66,7 @@ def test_schema():
 
 def test_glossary():
     platform = "urn:li:dataPlatform:kafka"
-    dataset_name = (
-        "test-timeline-sample-kafka"
-    )
+    dataset_name = "test-timeline-sample-kafka"
     env = "PROD"
     dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})"
 
@@ -71,7 +74,9 @@ def test_glossary():
     put(dataset_urn, "glossaryTerms", "test_resources/timeline/newglossaryv2.json")
     put(dataset_urn, "glossaryTerms", "test_resources/timeline/newglossaryv3.json")
 
-    res_data = timeline_cli.get_timeline(dataset_urn, ["GLOSSARY_TERM"], None, None, False)
+    res_data = timeline_cli.get_timeline(
+        dataset_urn, ["GLOSSARY_TERM"], None, None, False
+    )
 
     get_datahub_graph().hard_delete_entity(urn=dataset_urn)
     assert res_data
@@ -87,17 +92,29 @@ def test_glossary():
 
 def test_documentation():
     platform = "urn:li:dataPlatform:kafka"
-    dataset_name = (
-        "test-timeline-sample-kafka"
-    )
+    dataset_name = "test-timeline-sample-kafka"
     env = "PROD"
     dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})"
 
-    put(dataset_urn, "institutionalMemory", "test_resources/timeline/newdocumentation.json")
-    put(dataset_urn, "institutionalMemory", "test_resources/timeline/newdocumentationv2.json")
-    put(dataset_urn, "institutionalMemory", "test_resources/timeline/newdocumentationv3.json")
+    put(
+        dataset_urn,
+        "institutionalMemory",
+        "test_resources/timeline/newdocumentation.json",
+    )
+    put(
+        dataset_urn,
+        "institutionalMemory",
+        "test_resources/timeline/newdocumentationv2.json",
+    )
+    put(
+        dataset_urn,
+        "institutionalMemory",
+        "test_resources/timeline/newdocumentationv3.json",
+    )
 
-    res_data = timeline_cli.get_timeline(dataset_urn, ["DOCUMENTATION"], None, None, False)
+    res_data = timeline_cli.get_timeline(
+        dataset_urn, ["DOCUMENTATION"], None, None, False
+    )
 
     get_datahub_graph().hard_delete_entity(urn=dataset_urn)
     assert res_data
@@ -113,9 +130,7 @@ def test_documentation():
 
 def test_tags():
     platform = "urn:li:dataPlatform:kafka"
-    dataset_name = (
-        "test-timeline-sample-kafka"
-    )
+    dataset_name = "test-timeline-sample-kafka"
     env = "PROD"
     dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})"
 
@@ -139,9 +154,7 @@ def test_tags():
 
 def test_ownership():
     platform = "urn:li:dataPlatform:kafka"
-    dataset_name = (
-        "test-timeline-sample-kafka"
-    )
+    dataset_name = "test-timeline-sample-kafka"
     env = "PROD"
     dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})"
 
diff --git a/smoke-test/tests/tokens/revokable_access_token_test.py b/smoke-test/tests/tokens/revokable_access_token_test.py
index b10ad3aa3fc2a..55f3de594af4e 100644
--- a/smoke-test/tests/tokens/revokable_access_token_test.py
+++ b/smoke-test/tests/tokens/revokable_access_token_test.py
@@ -1,15 +1,11 @@
 import os
-import pytest
-import requests
 from time import sleep
 
-from tests.utils import (
-    get_frontend_url,
-    wait_for_healthcheck_util,
-    get_admin_credentials,
-    wait_for_writes_to_sync,
-)
+import pytest
+import requests
 
+from tests.utils import (get_admin_credentials, get_frontend_url,
+                         wait_for_healthcheck_util, wait_for_writes_to_sync)
 
 # Disable telemetry
 os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false"
diff --git a/smoke-test/tests/utils.py b/smoke-test/tests/utils.py
index af03efd4f71f8..bd75b13d1910f 100644
--- a/smoke-test/tests/utils.py
+++ b/smoke-test/tests/utils.py
@@ -1,19 +1,20 @@
 import functools
 import json
+import logging
 import os
-from datetime import datetime, timedelta, timezone
 import subprocess
 import time
-from typing import Any, Dict, List, Tuple
+from datetime import datetime, timedelta, timezone
 from time import sleep
-from joblib import Parallel, delayed
+from typing import Any, Dict, List, Tuple
 
-import requests_wrapper as requests
-import logging
 from datahub.cli import cli_utils
 from datahub.cli.cli_utils import get_system_auth
-from datahub.ingestion.graph.client import DataHubGraph, DatahubClientConfig
+from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
 from datahub.ingestion.run.pipeline import Pipeline
+from joblib import Parallel, delayed
+
+import requests_wrapper as requests
 from tests.consistency_utils import wait_for_writes_to_sync
 
 TIME: int = 1581407189000
@@ -174,6 +175,7 @@ def delete(entry):
 
     wait_for_writes_to_sync()
 
+
 # Fixed now value
 NOW: datetime = datetime.now()
 
@@ -232,6 +234,3 @@ def create_datahub_step_state_aspects(
     ]
     with open(onboarding_filename, "w") as f:
         json.dump(aspects_dict, f, indent=2)
-
-
-
diff --git a/smoke-test/tests/views/views_test.py b/smoke-test/tests/views/views_test.py
index 4da69750a167b..685c3bd80b04d 100644
--- a/smoke-test/tests/views/views_test.py
+++ b/smoke-test/tests/views/views_test.py
@@ -1,16 +1,14 @@
-import pytest
 import time
+
+import pytest
 import tenacity
-from tests.utils import (
-    delete_urns_from_file,
-    get_frontend_url,
-    get_gms_url,
-    ingest_file_via_rest,
-    get_sleep_info,
-)
+
+from tests.utils import (delete_urns_from_file, get_frontend_url, get_gms_url,
+                         get_sleep_info, ingest_file_via_rest)
 
 sleep_sec, sleep_times = get_sleep_info()
 
+
 @pytest.mark.dependency()
 def test_healthchecks(wait_for_healthchecks):
     # Call to wait_for_healthchecks fixture will do the actual functionality.
@@ -40,6 +38,7 @@ def _ensure_more_views(frontend_session, list_views_json, query_name, before_cou
     assert after_count == before_count + 1
     return after_count
 
+
 @tenacity.retry(
     stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec)
 )
@@ -111,18 +110,18 @@ def test_create_list_delete_global_view(frontend_session):
     new_view_name = "Test View"
     new_view_description = "Test Description"
     new_view_definition = {
-      "entityTypes": ["DATASET", "DASHBOARD"],
-      "filter": {
-        "operator": "AND",
-        "filters": [
-          {
-            "field": "tags",
-            "values": ["urn:li:tag:test"],
-            "negated": False,
-            "condition": "EQUAL"
-          }
-        ]
-      }
+        "entityTypes": ["DATASET", "DASHBOARD"],
+        "filter": {
+            "operator": "AND",
+            "filters": [
+                {
+                    "field": "tags",
+                    "values": ["urn:li:tag:test"],
+                    "negated": False,
+                    "condition": "EQUAL",
+                }
+            ],
+        },
     }
 
     # Create new View
@@ -137,7 +136,7 @@ def test_create_list_delete_global_view(frontend_session):
                 "viewType": "GLOBAL",
                 "name": new_view_name,
                 "description": new_view_description,
-                "definition": new_view_definition
+                "definition": new_view_definition,
             }
         },
     }
@@ -169,9 +168,7 @@ def test_create_list_delete_global_view(frontend_session):
         "query": """mutation deleteView($urn: String!) {\n
             deleteView(urn: $urn)
         }""",
-        "variables": {
-            "urn": view_urn
-        },
+        "variables": {"urn": view_urn},
     }
 
     response = frontend_session.post(
@@ -189,7 +186,9 @@ def test_create_list_delete_global_view(frontend_session):
     )
 
 
-@pytest.mark.dependency(depends=["test_healthchecks", "test_create_list_delete_global_view"])
+@pytest.mark.dependency(
+    depends=["test_healthchecks", "test_create_list_delete_global_view"]
+)
 def test_create_list_delete_personal_view(frontend_session):
 
     # Get count of existing views
@@ -237,18 +236,18 @@ def test_create_list_delete_personal_view(frontend_session):
     new_view_name = "Test View"
     new_view_description = "Test Description"
     new_view_definition = {
-      "entityTypes": ["DATASET", "DASHBOARD"],
-      "filter": {
-        "operator": "AND",
-        "filters": [
-          {
-            "field": "tags",
-            "values": ["urn:li:tag:test"],
-            "negated": False,
-            "condition": "EQUAL"
-          }
-        ]
-      }
+        "entityTypes": ["DATASET", "DASHBOARD"],
+        "filter": {
+            "operator": "AND",
+            "filters": [
+                {
+                    "field": "tags",
+                    "values": ["urn:li:tag:test"],
+                    "negated": False,
+                    "condition": "EQUAL",
+                }
+            ],
+        },
     }
 
     # Create new View
@@ -263,7 +262,7 @@ def test_create_list_delete_personal_view(frontend_session):
                 "viewType": "PERSONAL",
                 "name": new_view_name,
                 "description": new_view_description,
-                "definition": new_view_definition
+                "definition": new_view_definition,
             }
         },
     }
@@ -293,9 +292,7 @@ def test_create_list_delete_personal_view(frontend_session):
         "query": """mutation deleteView($urn: String!) {\n
             deleteView(urn: $urn)
         }""",
-        "variables": {
-            "urn": view_urn
-        },
+        "variables": {"urn": view_urn},
     }
 
     response = frontend_session.post(
@@ -312,25 +309,28 @@ def test_create_list_delete_personal_view(frontend_session):
         before_count=new_count,
     )
 
-@pytest.mark.dependency(depends=["test_healthchecks", "test_create_list_delete_personal_view"])
+
+@pytest.mark.dependency(
+    depends=["test_healthchecks", "test_create_list_delete_personal_view"]
+)
 def test_update_global_view(frontend_session):
 
     # First create a view
     new_view_name = "Test View"
     new_view_description = "Test Description"
     new_view_definition = {
-      "entityTypes": ["DATASET", "DASHBOARD"],
-      "filter": {
-        "operator": "AND",
-        "filters": [
-          {
-            "field": "tags",
-            "values": ["urn:li:tag:test"],
-            "negated": False,
-            "condition": "EQUAL"
-          }
-        ]
-      }
+        "entityTypes": ["DATASET", "DASHBOARD"],
+        "filter": {
+            "operator": "AND",
+            "filters": [
+                {
+                    "field": "tags",
+                    "values": ["urn:li:tag:test"],
+                    "negated": False,
+                    "condition": "EQUAL",
+                }
+            ],
+        },
     }
 
     # Create new View
@@ -345,7 +345,7 @@ def test_update_global_view(frontend_session):
                 "viewType": "PERSONAL",
                 "name": new_view_name,
                 "description": new_view_description,
-                "definition": new_view_definition
+                "definition": new_view_definition,
             }
         },
     }
@@ -366,18 +366,18 @@ def test_update_global_view(frontend_session):
     new_view_name = "New Test View"
     new_view_description = "New Test Description"
     new_view_definition = {
-      "entityTypes": ["DATASET", "DASHBOARD", "CHART", "DATA_FLOW"],
-      "filter": {
-        "operator": "OR",
-        "filters": [
-          {
-            "field": "glossaryTerms",
-            "values": ["urn:li:glossaryTerm:test"],
-            "negated": True,
-            "condition": "CONTAIN"
-          }
-        ]
-      }
+        "entityTypes": ["DATASET", "DASHBOARD", "CHART", "DATA_FLOW"],
+        "filter": {
+            "operator": "OR",
+            "filters": [
+                {
+                    "field": "glossaryTerms",
+                    "values": ["urn:li:glossaryTerm:test"],
+                    "negated": True,
+                    "condition": "CONTAIN",
+                }
+            ],
+        },
     }
 
     update_view_json = {
@@ -391,8 +391,8 @@ def test_update_global_view(frontend_session):
             "input": {
                 "name": new_view_name,
                 "description": new_view_description,
-                "definition": new_view_definition
-            }
+                "definition": new_view_definition,
+            },
         },
     }
 
@@ -411,9 +411,7 @@ def test_update_global_view(frontend_session):
         "query": """mutation deleteView($urn: String!) {\n
             deleteView(urn: $urn)
         }""",
-        "variables": {
-            "urn": view_urn
-        },
+        "variables": {"urn": view_urn},
     }
 
     response = frontend_session.post(

From 6ecdeda5ff590456c6bfadfa5c37821f7281169e Mon Sep 17 00:00:00 2001
From: Aseem Bansal <asmbansal2@gmail.com>
Date: Tue, 10 Oct 2023 16:28:40 +0530
Subject: [PATCH 108/156] fix(setup): drop older table if exists (#8979)

---
 docker/mariadb/init.sql        | 2 ++
 docker/mysql-setup/init.sql    | 2 ++
 docker/mysql/init.sql          | 2 ++
 docker/postgres-setup/init.sql | 2 ++
 docker/postgres/init.sql       | 2 ++
 5 files changed, 10 insertions(+)

diff --git a/docker/mariadb/init.sql b/docker/mariadb/init.sql
index c4132575cf442..95c8cabbc5ca4 100644
--- a/docker/mariadb/init.sql
+++ b/docker/mariadb/init.sql
@@ -28,3 +28,5 @@ insert into metadata_aspect_v2 (urn, aspect, version, metadata, createdon, creat
   now(),
   'urn:li:corpuser:__datahub_system'
 );
+
+DROP TABLE IF EXISTS metadata_index;
diff --git a/docker/mysql-setup/init.sql b/docker/mysql-setup/init.sql
index 2370a971941d2..b789329ddfd17 100644
--- a/docker/mysql-setup/init.sql
+++ b/docker/mysql-setup/init.sql
@@ -39,3 +39,5 @@ INSERT INTO metadata_aspect_v2
 SELECT * FROM temp_metadata_aspect_v2
 WHERE NOT EXISTS (SELECT * from metadata_aspect_v2);
 DROP TABLE temp_metadata_aspect_v2;
+
+DROP TABLE IF EXISTS metadata_index;
diff --git a/docker/mysql/init.sql b/docker/mysql/init.sql
index b4b4e4617806c..aca57d7cd444c 100644
--- a/docker/mysql/init.sql
+++ b/docker/mysql/init.sql
@@ -27,3 +27,5 @@ INSERT INTO metadata_aspect_v2 (urn, aspect, version, metadata, createdon, creat
   now(),
   'urn:li:corpuser:__datahub_system'
 );
+
+DROP TABLE IF EXISTS metadata_index;
diff --git a/docker/postgres-setup/init.sql b/docker/postgres-setup/init.sql
index 12fff7aec7fe6..72b2f73192e00 100644
--- a/docker/postgres-setup/init.sql
+++ b/docker/postgres-setup/init.sql
@@ -35,3 +35,5 @@ INSERT INTO metadata_aspect_v2
 SELECT * FROM temp_metadata_aspect_v2
 WHERE NOT EXISTS (SELECT * from metadata_aspect_v2);
 DROP TABLE temp_metadata_aspect_v2;
+
+DROP TABLE IF EXISTS metadata_index;
diff --git a/docker/postgres/init.sql b/docker/postgres/init.sql
index cf477c135422e..87c8dd3337fac 100644
--- a/docker/postgres/init.sql
+++ b/docker/postgres/init.sql
@@ -28,3 +28,5 @@ insert into metadata_aspect_v2 (urn, aspect, version, metadata, createdon, creat
   now(),
   'urn:li:corpuser:__datahub_system'
 );
+
+DROP TABLE IF EXISTS metadata_index;

From 1a72fa499c3404c6c3d2961e9575495f2dd021d2 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Tue, 10 Oct 2023 17:34:06 -0400
Subject: [PATCH 109/156] feat(ingest/tableau): Allow parsing of database name
 from fullName (#8981)

---
 .../src/datahub/ingestion/source/tableau.py   |  74 ++------
 .../ingestion/source/tableau_common.py        | 162 +++++++++++++-----
 .../tableau/test_tableau_ingest.py            |  34 ++--
 3 files changed, 151 insertions(+), 119 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
index e347cd26d245a..bad7ae49d325e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
@@ -77,6 +77,7 @@
     FIELD_TYPE_MAPPING,
     MetadataQueryException,
     TableauLineageOverrides,
+    TableauUpstreamReference,
     clean_query,
     custom_sql_graphql_query,
     dashboard_graphql_query,
@@ -85,7 +86,6 @@
     get_overridden_info,
     get_unique_custom_sql,
     make_fine_grained_lineage_class,
-    make_table_urn,
     make_upstream_class,
     published_datasource_graphql_query,
     query_metadata,
@@ -271,7 +271,7 @@ class TableauConfig(
         "You can change this if your Tableau projects contain slashes in their names, and you'd like to filter by project.",
     )
 
-    default_schema_map: dict = Field(
+    default_schema_map: Dict[str, str] = Field(
         default={}, description="Default schema to use when schema is not found."
     )
     ingest_tags: Optional[bool] = Field(
@@ -997,41 +997,16 @@ def get_upstream_tables(
                 )
                 continue
 
-            schema = table.get(tableau_constant.SCHEMA) or ""
-            table_name = table.get(tableau_constant.NAME) or ""
-            full_name = table.get(tableau_constant.FULL_NAME) or ""
-            upstream_db = (
-                table[tableau_constant.DATABASE][tableau_constant.NAME]
-                if table.get(tableau_constant.DATABASE)
-                and table[tableau_constant.DATABASE].get(tableau_constant.NAME)
-                else ""
-            )
-            logger.debug(
-                "Processing Table with Connection Type: {0} and id {1}".format(
-                    table.get(tableau_constant.CONNECTION_TYPE) or "",
-                    table.get(tableau_constant.ID) or "",
+            try:
+                ref = TableauUpstreamReference.create(
+                    table, default_schema_map=self.config.default_schema_map
                 )
-            )
-            schema = self._get_schema(schema, upstream_db, full_name)
-            # if the schema is included within the table name we omit it
-            if (
-                schema
-                and table_name
-                and full_name
-                and table_name == full_name
-                and schema in table_name
-            ):
-                logger.debug(
-                    f"Omitting schema for upstream table {table[tableau_constant.ID]}, schema included in table name"
-                )
-                schema = ""
+            except Exception as e:
+                logger.info(f"Failed to generate upstream reference for {table}: {e}")
+                continue
 
-            table_urn = make_table_urn(
+            table_urn = ref.make_dataset_urn(
                 self.config.env,
-                upstream_db,
-                table.get(tableau_constant.CONNECTION_TYPE) or "",
-                schema,
-                table_name,
                 self.config.platform_instance_map,
                 self.config.lineage_overrides,
             )
@@ -1052,7 +1027,7 @@ def get_upstream_tables(
                     urn=table_urn,
                     id=table[tableau_constant.ID],
                     num_cols=num_tbl_cols,
-                    paths=set([table_path]) if table_path else set(),
+                    paths={table_path} if table_path else set(),
                 )
             else:
                 self.database_tables[table_urn].update_table(
@@ -2462,35 +2437,6 @@ def emit_embedded_datasources(self) -> Iterable[MetadataWorkUnit]:
                 is_embedded_ds=True,
             )
 
-    @lru_cache(maxsize=None)
-    def _get_schema(self, schema_provided: str, database: str, fullName: str) -> str:
-        # For some databases, the schema attribute in tableau api does not return
-        # correct schema name for the table. For more information, see
-        # https://help.tableau.com/current/api/metadata_api/en-us/docs/meta_api_model.html#schema_attribute.
-        # Hence we extract schema from fullName whenever fullName is available
-        schema = self._extract_schema_from_fullName(fullName) if fullName else ""
-        if not schema:
-            schema = schema_provided
-        elif schema != schema_provided:
-            logger.debug(
-                "Correcting schema, provided {0}, corrected {1}".format(
-                    schema_provided, schema
-                )
-            )
-
-        if not schema and database in self.config.default_schema_map:
-            schema = self.config.default_schema_map[database]
-
-        return schema
-
-    @lru_cache(maxsize=None)
-    def _extract_schema_from_fullName(self, fullName: str) -> str:
-        # fullName is observed to be in format [schemaName].[tableName]
-        # OR simply tableName OR [tableName]
-        if fullName.startswith("[") and "].[" in fullName:
-            return fullName[1 : fullName.index("]")]
-        return ""
-
     @lru_cache(maxsize=None)
     def get_last_modified(
         self, creator: Optional[str], created_at: bytes, updated_at: bytes
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py
index 2c92285fdba77..7c4852042ce7c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py
@@ -1,4 +1,6 @@
 import html
+import logging
+from dataclasses import dataclass
 from functools import lru_cache
 from typing import Dict, List, Optional, Tuple
 
@@ -6,6 +8,7 @@
 
 import datahub.emitter.mce_builder as builder
 from datahub.configuration.common import ConfigModel
+from datahub.ingestion.source import tableau_constant as tc
 from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
     DatasetLineageType,
     FineGrainedLineage,
@@ -31,6 +34,8 @@
 )
 from datahub.utilities.sqlglot_lineage import ColumnLineageInfo, SqlParsingResult
 
+logger = logging.getLogger(__name__)
+
 
 class TableauLineageOverrides(ConfigModel):
     platform_override_map: Optional[Dict[str, str]] = Field(
@@ -537,12 +542,12 @@ def get_fully_qualified_table_name(
     platform: str,
     upstream_db: str,
     schema: str,
-    full_name: str,
+    table_name: str,
 ) -> str:
     if platform == "athena":
         upstream_db = ""
     database_name = f"{upstream_db}." if upstream_db else ""
-    final_name = full_name.replace("[", "").replace("]", "")
+    final_name = table_name.replace("[", "").replace("]", "")
 
     schema_name = f"{schema}." if schema else ""
 
@@ -573,17 +578,123 @@ def get_fully_qualified_table_name(
     return fully_qualified_table_name
 
 
-def get_platform_instance(
-    platform: str, platform_instance_map: Optional[Dict[str, str]]
-) -> Optional[str]:
-    if platform_instance_map is not None and platform in platform_instance_map.keys():
-        return platform_instance_map[platform]
+@dataclass
+class TableauUpstreamReference:
+    database: Optional[str]
+    schema: Optional[str]
+    table: str
+
+    connection_type: str
+
+    @classmethod
+    def create(
+        cls, d: dict, default_schema_map: Optional[Dict[str, str]] = None
+    ) -> "TableauUpstreamReference":
+        # Values directly from `table` object from Tableau
+        database = t_database = d.get(tc.DATABASE, {}).get(tc.NAME)
+        schema = t_schema = d.get(tc.SCHEMA)
+        table = t_table = d.get(tc.NAME) or ""
+        t_full_name = d.get(tc.FULL_NAME)
+        t_connection_type = d[tc.CONNECTION_TYPE]  # required to generate urn
+        t_id = d[tc.ID]
+
+        parsed_full_name = cls.parse_full_name(t_full_name)
+        if parsed_full_name and len(parsed_full_name) == 3:
+            database, schema, table = parsed_full_name
+        elif parsed_full_name and len(parsed_full_name) == 2:
+            schema, table = parsed_full_name
+        else:
+            logger.debug(
+                f"Upstream urn generation ({t_id}):"
+                f"  Did not parse full name {t_full_name}: unexpected number of values",
+            )
+
+        if not schema and default_schema_map and database in default_schema_map:
+            schema = default_schema_map[database]
+
+        if database != t_database:
+            logger.debug(
+                f"Upstream urn generation ({t_id}):"
+                f" replacing database {t_database} with {database} from full name {t_full_name}"
+            )
+        if schema != t_schema:
+            logger.debug(
+                f"Upstream urn generation ({t_id}):"
+                f" replacing schema {t_schema} with {schema} from full name {t_full_name}"
+            )
+        if table != t_table:
+            logger.debug(
+                f"Upstream urn generation ({t_id}):"
+                f" replacing table {t_table} with {table} from full name {t_full_name}"
+            )
+
+        # TODO: See if we can remove this -- made for redshift
+        if (
+            schema
+            and t_table
+            and t_full_name
+            and t_table == t_full_name
+            and schema in t_table
+        ):
+            logger.debug(
+                f"Omitting schema for upstream table {t_id}, schema included in table name"
+            )
+            schema = ""
+
+        return cls(
+            database=database,
+            schema=schema,
+            table=table,
+            connection_type=t_connection_type,
+        )
+
+    @staticmethod
+    def parse_full_name(full_name: Optional[str]) -> Optional[List[str]]:
+        # fullName is observed to be in formats:
+        #  [database].[schema].[table]
+        #  [schema].[table]
+        #  [table]
+        #  table
+        #  schema
+
+        # TODO: Validate the startswith check. Currently required for our integration tests
+        if full_name is None or not full_name.startswith("["):
+            return None
+
+        return full_name.replace("[", "").replace("]", "").split(".")
+
+    def make_dataset_urn(
+        self,
+        env: str,
+        platform_instance_map: Optional[Dict[str, str]],
+        lineage_overrides: Optional[TableauLineageOverrides] = None,
+    ) -> str:
+        (
+            upstream_db,
+            platform_instance,
+            platform,
+            original_platform,
+        ) = get_overridden_info(
+            connection_type=self.connection_type,
+            upstream_db=self.database,
+            lineage_overrides=lineage_overrides,
+            platform_instance_map=platform_instance_map,
+        )
+
+        table_name = get_fully_qualified_table_name(
+            original_platform,
+            upstream_db or "",
+            self.schema,
+            self.table,
+        )
 
-    return None
+        return builder.make_dataset_urn_with_platform_instance(
+            platform, table_name, platform_instance, env
+        )
 
 
 def get_overridden_info(
-    connection_type: str,
+    connection_type: Optional[str],
     upstream_db: Optional[str],
     platform_instance_map: Optional[Dict[str, str]],
     lineage_overrides: Optional[TableauLineageOverrides] = None,
@@ -605,7 +716,9 @@ def get_overridden_info(
     ):
         upstream_db = lineage_overrides.database_override_map[upstream_db]
 
-    platform_instance = get_platform_instance(original_platform, platform_instance_map)
+    platform_instance = (
+        platform_instance_map.get(original_platform) if platform_instance_map else None
+    )
 
     if original_platform in ("athena", "hive", "mysql"):  # Two tier databases
         upstream_db = None
@@ -613,35 +726,6 @@ def get_overridden_info(
     return upstream_db, platform_instance, platform, original_platform
 
 
-def make_table_urn(
-    env: str,
-    upstream_db: Optional[str],
-    connection_type: str,
-    schema: str,
-    full_name: str,
-    platform_instance_map: Optional[Dict[str, str]],
-    lineage_overrides: Optional[TableauLineageOverrides] = None,
-) -> str:
-
-    upstream_db, platform_instance, platform, original_platform = get_overridden_info(
-        connection_type=connection_type,
-        upstream_db=upstream_db,
-        lineage_overrides=lineage_overrides,
-        platform_instance_map=platform_instance_map,
-    )
-
-    table_name = get_fully_qualified_table_name(
-        original_platform,
-        upstream_db if upstream_db is not None else "",
-        schema,
-        full_name,
-    )
-
-    return builder.make_dataset_urn_with_platform_instance(
-        platform, table_name, platform_instance, env
-    )
-
-
 def make_description_from_params(description, formula):
     """
     Generate column description
diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
index c31867f5aa904..0510f4a40f659 100644
--- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
+++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
@@ -20,7 +20,7 @@
 from datahub.ingestion.source.tableau import TableauConfig, TableauSource
 from datahub.ingestion.source.tableau_common import (
     TableauLineageOverrides,
-    make_table_urn,
+    TableauUpstreamReference,
 )
 from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
     DatasetLineageType,
@@ -546,13 +546,13 @@ def test_lineage_overrides():
     enable_logging()
     # Simple - specify platform instance to presto table
     assert (
-        make_table_urn(
-            DEFAULT_ENV,
+        TableauUpstreamReference(
             "presto_catalog",
-            "presto",
             "test-schema",
-            "presto_catalog.test-schema.test-table",
-            platform_instance_map={"presto": "my_presto_instance"},
+            "test-table",
+            "presto",
+        ).make_dataset_urn(
+            env=DEFAULT_ENV, platform_instance_map={"presto": "my_presto_instance"}
         )
         == "urn:li:dataset:(urn:li:dataPlatform:presto,my_presto_instance.presto_catalog.test-schema.test-table,PROD)"
     )
@@ -560,12 +560,13 @@ def test_lineage_overrides():
     # Transform presto urn to hive urn
     # resulting platform instance for hive = mapped platform instance + presto_catalog
     assert (
-        make_table_urn(
-            DEFAULT_ENV,
+        TableauUpstreamReference(
             "presto_catalog",
-            "presto",
             "test-schema",
-            "presto_catalog.test-schema.test-table",
+            "test-table",
+            "presto",
+        ).make_dataset_urn(
+            env=DEFAULT_ENV,
             platform_instance_map={"presto": "my_instance"},
             lineage_overrides=TableauLineageOverrides(
                 platform_override_map={"presto": "hive"},
@@ -574,14 +575,15 @@ def test_lineage_overrides():
         == "urn:li:dataset:(urn:li:dataPlatform:hive,my_instance.presto_catalog.test-schema.test-table,PROD)"
     )
 
-    # tranform hive urn to presto urn
+    # transform hive urn to presto urn
     assert (
-        make_table_urn(
-            DEFAULT_ENV,
-            "",
-            "hive",
+        TableauUpstreamReference(
+            None,
             "test-schema",
-            "test-schema.test-table",
+            "test-table",
+            "hive",
+        ).make_dataset_urn(
+            env=DEFAULT_ENV,
             platform_instance_map={"hive": "my_presto_instance.presto_catalog"},
             lineage_overrides=TableauLineageOverrides(
                 platform_override_map={"hive": "presto"},

From e2988017c23270acd95e25ec3289983ecc3895f7 Mon Sep 17 00:00:00 2001
From: Amanda Hernando <110099762+amanda-her@users.noreply.github.com>
Date: Wed, 11 Oct 2023 01:36:01 +0200
Subject: [PATCH 110/156] feat(auth): add data platform instance field resolver
 provider (#8828)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Sergio Gómez Villamor <sgomezvillamor@gmail.com>
Co-authored-by: Adrián Pertíñez <khurzak92@gmail.com>
---
 .../authorization/ResolvedResourceSpec.java   |  17 ++
 .../authorization/ResourceFieldType.java      |   6 +-
 .../DefaultResourceSpecResolver.java          |   9 +-
 ...PlatformInstanceFieldResolverProvider.java |  70 +++++++
 ...formInstanceFieldResolverProviderTest.java | 188 ++++++++++++++++++
 5 files changed, 286 insertions(+), 4 deletions(-)
 create mode 100644 metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java
 create mode 100644 metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java

diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java
index 53dd0be44f963..8e429a8ca1b94 100644
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java
@@ -3,6 +3,7 @@
 import java.util.Collections;
 import java.util.Map;
 import java.util.Set;
+import javax.annotation.Nullable;
 import lombok.Getter;
 import lombok.RequiredArgsConstructor;
 import lombok.ToString;
@@ -35,4 +36,20 @@ public Set<String> getOwners() {
     }
     return fieldResolvers.get(ResourceFieldType.OWNER).getFieldValuesFuture().join().getValues();
   }
+
+  /**
+   * Fetch the platform instance for a Resolved Resource Spec
+   * @return a Platform Instance or null if one does not exist.
+   */
+  @Nullable
+  public String getDataPlatformInstance() {
+    if (!fieldResolvers.containsKey(ResourceFieldType.DATA_PLATFORM_INSTANCE)) {
+      return null;
+    }
+    Set<String> dataPlatformInstance = fieldResolvers.get(ResourceFieldType.DATA_PLATFORM_INSTANCE).getFieldValuesFuture().join().getValues();
+    if (dataPlatformInstance.size() > 0) {
+      return dataPlatformInstance.stream().findFirst().get();
+    }
+    return null;
+  }
 }
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceFieldType.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceFieldType.java
index ee54d2bfbba1d..478522dc7c331 100644
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceFieldType.java
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceFieldType.java
@@ -19,5 +19,9 @@ public enum ResourceFieldType {
   /**
    * Domains of resource
    */
-  DOMAIN
+  DOMAIN,
+  /**
+   * Data platform instance of resource
+   */
+  DATA_PLATFORM_INSTANCE
 }
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultResourceSpecResolver.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultResourceSpecResolver.java
index cd4e0b0967829..64c43dc8aa591 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultResourceSpecResolver.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultResourceSpecResolver.java
@@ -1,13 +1,15 @@
 package com.datahub.authorization;
 
-import com.datahub.authorization.fieldresolverprovider.EntityTypeFieldResolverProvider;
-import com.datahub.authorization.fieldresolverprovider.OwnerFieldResolverProvider;
 import com.datahub.authentication.Authentication;
+import com.datahub.authorization.fieldresolverprovider.DataPlatformInstanceFieldResolverProvider;
 import com.datahub.authorization.fieldresolverprovider.DomainFieldResolverProvider;
+import com.datahub.authorization.fieldresolverprovider.EntityTypeFieldResolverProvider;
 import com.datahub.authorization.fieldresolverprovider.EntityUrnFieldResolverProvider;
+import com.datahub.authorization.fieldresolverprovider.OwnerFieldResolverProvider;
 import com.datahub.authorization.fieldresolverprovider.ResourceFieldResolverProvider;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.entity.client.EntityClient;
+
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
@@ -20,7 +22,8 @@ public DefaultResourceSpecResolver(Authentication systemAuthentication, EntityCl
     _resourceFieldResolverProviders =
         ImmutableList.of(new EntityTypeFieldResolverProvider(), new EntityUrnFieldResolverProvider(),
             new DomainFieldResolverProvider(entityClient, systemAuthentication),
-            new OwnerFieldResolverProvider(entityClient, systemAuthentication));
+            new OwnerFieldResolverProvider(entityClient, systemAuthentication),
+            new DataPlatformInstanceFieldResolverProvider(entityClient, systemAuthentication));
   }
 
   @Override
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java
new file mode 100644
index 0000000000000..cd838625c2ca1
--- /dev/null
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java
@@ -0,0 +1,70 @@
+package com.datahub.authorization.fieldresolverprovider;
+
+import com.datahub.authentication.Authentication;
+import com.datahub.authorization.FieldResolver;
+import com.datahub.authorization.ResourceFieldType;
+import com.datahub.authorization.ResourceSpec;
+import com.linkedin.common.DataPlatformInstance;
+import com.linkedin.common.urn.Urn;
+import com.linkedin.common.urn.UrnUtils;
+import com.linkedin.entity.EntityResponse;
+import com.linkedin.entity.EnvelopedAspect;
+import com.linkedin.entity.client.EntityClient;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+
+import java.util.Collections;
+import java.util.Objects;
+
+import static com.linkedin.metadata.Constants.*;
+
+/**
+ * Provides field resolver for domain given resourceSpec
+ */
+@Slf4j
+@RequiredArgsConstructor
+public class DataPlatformInstanceFieldResolverProvider implements ResourceFieldResolverProvider {
+
+  private final EntityClient _entityClient;
+  private final Authentication _systemAuthentication;
+
+  @Override
+  public ResourceFieldType getFieldType() {
+    return ResourceFieldType.DATA_PLATFORM_INSTANCE;
+  }
+
+  @Override
+  public FieldResolver getFieldResolver(ResourceSpec resourceSpec) {
+    return FieldResolver.getResolverFromFunction(resourceSpec, this::getDataPlatformInstance);
+  }
+
+  private FieldResolver.FieldValue getDataPlatformInstance(ResourceSpec resourceSpec) {
+    Urn entityUrn = UrnUtils.getUrn(resourceSpec.getResource());
+    // In the case that the entity is a platform instance, the associated platform instance entity is the instance itself
+    if (entityUrn.getEntityType().equals(DATA_PLATFORM_INSTANCE_ENTITY_NAME)) {
+      return FieldResolver.FieldValue.builder()
+          .values(Collections.singleton(entityUrn.toString()))
+          .build();
+    }
+
+    EnvelopedAspect dataPlatformInstanceAspect;
+    try {
+      EntityResponse response = _entityClient.getV2(entityUrn.getEntityType(), entityUrn,
+          Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME), _systemAuthentication);
+      if (response == null || !response.getAspects().containsKey(DATA_PLATFORM_INSTANCE_ASPECT_NAME)) {
+        return FieldResolver.emptyFieldValue();
+      }
+      dataPlatformInstanceAspect = response.getAspects().get(DATA_PLATFORM_INSTANCE_ASPECT_NAME);
+    } catch (Exception e) {
+      log.error("Error while retrieving platform instance aspect for urn {}", entityUrn, e);
+      return FieldResolver.emptyFieldValue();
+    }
+    DataPlatformInstance dataPlatformInstance = new DataPlatformInstance(dataPlatformInstanceAspect.getValue().data());
+    if (dataPlatformInstance.getInstance() == null) {
+      return FieldResolver.emptyFieldValue();
+    }
+    return FieldResolver.FieldValue.builder()
+        .values(Collections.singleton(Objects.requireNonNull(dataPlatformInstance.getInstance()).toString()))
+        .build();
+  }
+}
\ No newline at end of file
diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java
new file mode 100644
index 0000000000000..e525c602c2620
--- /dev/null
+++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java
@@ -0,0 +1,188 @@
+package com.datahub.authorization.fieldresolverprovider;
+
+import com.datahub.authentication.Authentication;
+import com.datahub.authorization.ResourceFieldType;
+import com.datahub.authorization.ResourceSpec;
+import com.linkedin.common.DataPlatformInstance;
+import com.linkedin.common.urn.Urn;
+import com.linkedin.entity.Aspect;
+import com.linkedin.entity.EntityResponse;
+import com.linkedin.entity.EnvelopedAspect;
+import com.linkedin.entity.EnvelopedAspectMap;
+import com.linkedin.entity.client.EntityClient;
+import com.linkedin.r2.RemoteInvocationException;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import java.net.URISyntaxException;
+import java.util.Collections;
+import java.util.Set;
+
+import static com.linkedin.metadata.Constants.*;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.*;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+public class DataPlatformInstanceFieldResolverProviderTest {
+
+  private static final String DATA_PLATFORM_INSTANCE_URN =
+      "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)";
+  private static final String RESOURCE_URN =
+      "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.testDataset,PROD)";
+  private static final ResourceSpec RESOURCE_SPEC = new ResourceSpec(DATASET_ENTITY_NAME, RESOURCE_URN);
+
+  @Mock
+  private EntityClient entityClientMock;
+  @Mock
+  private Authentication systemAuthenticationMock;
+
+  private DataPlatformInstanceFieldResolverProvider dataPlatformInstanceFieldResolverProvider;
+
+  @BeforeMethod
+  public void setup() {
+    MockitoAnnotations.initMocks(this);
+    dataPlatformInstanceFieldResolverProvider =
+        new DataPlatformInstanceFieldResolverProvider(entityClientMock, systemAuthenticationMock);
+  }
+
+  @Test
+  public void shouldReturnDataPlatformInstanceType() {
+    assertEquals(ResourceFieldType.DATA_PLATFORM_INSTANCE, dataPlatformInstanceFieldResolverProvider.getFieldType());
+  }
+
+  @Test
+  public void shouldReturnFieldValueWithResourceSpecIfTypeIsDataPlatformInstance() {
+    var resourceSpec = new ResourceSpec(DATA_PLATFORM_INSTANCE_ENTITY_NAME, DATA_PLATFORM_INSTANCE_URN);
+
+    var result = dataPlatformInstanceFieldResolverProvider.getFieldResolver(resourceSpec);
+
+    assertEquals(Set.of(DATA_PLATFORM_INSTANCE_URN), result.getFieldValuesFuture().join().getValues());
+    verifyZeroInteractions(entityClientMock);
+  }
+
+  @Test
+  public void shouldReturnEmptyFieldValueWhenResponseIsNull() throws RemoteInvocationException, URISyntaxException {
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenReturn(null);
+
+    var result = dataPlatformInstanceFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertTrue(result.getFieldValuesFuture().join().getValues().isEmpty());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+
+  @Test
+  public void shouldReturnEmptyFieldValueWhenResourceHasNoDataPlatformInstance()
+      throws RemoteInvocationException, URISyntaxException {
+    var entityResponseMock = mock(EntityResponse.class);
+    when(entityResponseMock.getAspects()).thenReturn(new EnvelopedAspectMap());
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenReturn(entityResponseMock);
+
+    var result = dataPlatformInstanceFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertTrue(result.getFieldValuesFuture().join().getValues().isEmpty());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+
+  @Test
+  public void shouldReturnEmptyFieldValueWhenThereIsAnException() throws RemoteInvocationException, URISyntaxException {
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenThrow(new RemoteInvocationException());
+
+    var result = dataPlatformInstanceFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertTrue(result.getFieldValuesFuture().join().getValues().isEmpty());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+
+  @Test
+  public void shouldReturnEmptyFieldValueWhenDataPlatformInstanceHasNoInstance()
+      throws RemoteInvocationException, URISyntaxException {
+
+    var dataPlatform = new DataPlatformInstance()
+        .setPlatform(Urn.createFromString("urn:li:dataPlatform:s3"));
+    var entityResponseMock = mock(EntityResponse.class);
+    var envelopedAspectMap = new EnvelopedAspectMap();
+    envelopedAspectMap.put(DATA_PLATFORM_INSTANCE_ASPECT_NAME,
+        new EnvelopedAspect().setValue(new Aspect(dataPlatform.data())));
+    when(entityResponseMock.getAspects()).thenReturn(envelopedAspectMap);
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenReturn(entityResponseMock);
+
+    var result = dataPlatformInstanceFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertTrue(result.getFieldValuesFuture().join().getValues().isEmpty());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+
+  @Test
+  public void shouldReturnFieldValueWithDataPlatformInstanceOfTheResource()
+      throws RemoteInvocationException, URISyntaxException {
+
+    var dataPlatformInstance = new DataPlatformInstance()
+        .setPlatform(Urn.createFromString("urn:li:dataPlatform:s3"))
+        .setInstance(Urn.createFromString(DATA_PLATFORM_INSTANCE_URN));
+    var entityResponseMock = mock(EntityResponse.class);
+    var envelopedAspectMap = new EnvelopedAspectMap();
+    envelopedAspectMap.put(DATA_PLATFORM_INSTANCE_ASPECT_NAME,
+        new EnvelopedAspect().setValue(new Aspect(dataPlatformInstance.data())));
+    when(entityResponseMock.getAspects()).thenReturn(envelopedAspectMap);
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenReturn(entityResponseMock);
+
+    var result = dataPlatformInstanceFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertEquals(Set.of(DATA_PLATFORM_INSTANCE_URN), result.getFieldValuesFuture().join().getValues());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(Collections.singleton(DATA_PLATFORM_INSTANCE_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+}

From a17db676e37d90ec47f16a43ab95e0d562952939 Mon Sep 17 00:00:00 2001
From: siladitya <68184387+siladitya2@users.noreply.github.com>
Date: Wed, 11 Oct 2023 02:43:36 +0200
Subject: [PATCH 111/156] feat(graphql): Added datafetcher for
 DataPlatformInstance entity (#8935)

Co-authored-by: si-chakraborty <si.chakraborty@adevinta.com>
Co-authored-by: John Joyce <john@acryl.io>
---
 .../datahub/graphql/GmsGraphQLEngine.java     |  1 +
 .../DataPlatformInstanceType.java             | 34 ++++++++++++++++++-
 .../src/main/resources/entity.graphql         |  5 +++
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
index 3ba0cc1f747e3..ebb5c7d62c7d3 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
@@ -821,6 +821,7 @@ private void configureQueryResolvers(final RuntimeWiring.Builder builder) {
             .dataFetcher("glossaryNode", getResolver(glossaryNodeType))
             .dataFetcher("domain", getResolver((domainType)))
             .dataFetcher("dataPlatform", getResolver(dataPlatformType))
+            .dataFetcher("dataPlatformInstance", getResolver(dataPlatformInstanceType))
             .dataFetcher("mlFeatureTable", getResolver(mlFeatureTableType))
             .dataFetcher("mlFeature", getResolver(mlFeatureType))
             .dataFetcher("mlPrimaryKey", getResolver(mlPrimaryKeyType))
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataplatforminstance/DataPlatformInstanceType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataplatforminstance/DataPlatformInstanceType.java
index 2423fc31ea52e..87614e1332528 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataplatforminstance/DataPlatformInstanceType.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataplatforminstance/DataPlatformInstanceType.java
@@ -4,16 +4,25 @@
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.datahub.graphql.QueryContext;
+import com.linkedin.datahub.graphql.generated.AutoCompleteResults;
 import com.linkedin.datahub.graphql.generated.DataPlatformInstance;
 import com.linkedin.datahub.graphql.generated.Entity;
 import com.linkedin.datahub.graphql.generated.EntityType;
+import com.linkedin.datahub.graphql.generated.FacetFilterInput;
+import com.linkedin.datahub.graphql.generated.SearchResults;
 import com.linkedin.datahub.graphql.types.dataplatforminstance.mappers.DataPlatformInstanceMapper;
+import com.linkedin.datahub.graphql.types.mappers.AutoCompleteResultsMapper;
+import com.linkedin.datahub.graphql.types.SearchableEntityType;
 import com.linkedin.entity.EntityResponse;
 import com.linkedin.entity.client.EntityClient;
 import com.linkedin.metadata.Constants;
+import com.linkedin.metadata.query.AutoCompleteResult;
+import com.linkedin.metadata.query.filter.Filter;
 import graphql.execution.DataFetcherResult;
+import org.apache.commons.lang3.NotImplementedException;
 
 import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
@@ -22,7 +31,10 @@
 import java.util.function.Function;
 import java.util.stream.Collectors;
 
-public class DataPlatformInstanceType implements com.linkedin.datahub.graphql.types.EntityType<DataPlatformInstance, String> {
+import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ENTITY_NAME;
+
+public class DataPlatformInstanceType implements SearchableEntityType<DataPlatformInstance, String>,
+        com.linkedin.datahub.graphql.types.EntityType<DataPlatformInstance, String> {
 
     static final Set<String> ASPECTS_TO_FETCH = ImmutableSet.of(
         Constants.DATA_PLATFORM_INSTANCE_KEY_ASPECT_NAME,
@@ -84,4 +96,24 @@ public List<DataFetcherResult<DataPlatformInstance>> batchLoad(@Nonnull List<Str
         }
     }
 
+    @Override
+    public SearchResults search(@Nonnull String query,
+                                @Nullable List<FacetFilterInput> filters,
+                                int start,
+                                int count,
+                                @Nonnull final QueryContext context) throws Exception {
+        throw new NotImplementedException("Searchable type (deprecated) not implemented on DataPlatformInstance entity type");
+    }
+
+    @Override
+    public AutoCompleteResults autoComplete(@Nonnull String query,
+                                            @Nullable String field,
+                                            @Nullable Filter filters,
+                                            int limit,
+                                            @Nonnull final QueryContext context) throws Exception {
+        final AutoCompleteResult result = _entityClient.autoComplete(DATA_PLATFORM_INSTANCE_ENTITY_NAME, query,
+                filters, limit, context.getAuthentication());
+        return AutoCompleteResultsMapper.map(result);
+    }
+
 }
diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql
index 39f86948c77c4..0b15d7b875a9c 100644
--- a/datahub-graphql-core/src/main/resources/entity.graphql
+++ b/datahub-graphql-core/src/main/resources/entity.graphql
@@ -226,6 +226,11 @@ type Query {
     listOwnershipTypes(
         "Input required for listing custom ownership types"
         input: ListOwnershipTypesInput!): ListOwnershipTypesResult!
+
+    """
+    Fetch a Data Platform Instance by primary key (urn)
+    """
+    dataPlatformInstance(urn: String!): DataPlatformInstance
 }
 
 """

From dfcea2441e75e1eef517c0f9a4765e6e7990f297 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= <sgomezvillamor@gmail.com>
Date: Wed, 11 Oct 2023 03:04:44 +0200
Subject: [PATCH 112/156] feat(config): configurable bootstrap policies file
 (#8812)

Co-authored-by: John Joyce <john@acryl.io>
---
 .../configuration/src/main/resources/application.yml   |  4 ++++
 .../boot/factories/BootstrapManagerFactory.java        |  7 ++++++-
 .../metadata/boot/steps/IngestPoliciesStep.java        | 10 +++++++---
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml
index 4dfd96ac75c6c..d22f92adca8f9 100644
--- a/metadata-service/configuration/src/main/resources/application.yml
+++ b/metadata-service/configuration/src/main/resources/application.yml
@@ -276,6 +276,10 @@ bootstrap:
     enabled: ${UPGRADE_DEFAULT_BROWSE_PATHS_ENABLED:false} # enable to run the upgrade to migrate legacy default browse paths to new ones
   backfillBrowsePathsV2:
     enabled: ${BACKFILL_BROWSE_PATHS_V2:false} # Enables running the backfill of browsePathsV2 upgrade step. There are concerns about the load of this step so hiding it behind a flag. Deprecating in favor of running through SystemUpdate
+  policies:
+    file: ${BOOTSTRAP_POLICIES_FILE:classpath:boot/policies.json}
+    # eg for local file
+    # file: "file:///datahub/datahub-gms/resources/custom-policies.json"
   servlets:
     waitTimeout: ${BOOTSTRAP_SERVLETS_WAITTIMEOUT:60} # Total waiting time in seconds for servlets to initialize
 
diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java
index c490f00021201..3a761bd12647e 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java
@@ -31,6 +31,7 @@
 import com.linkedin.metadata.search.EntitySearchService;
 import com.linkedin.metadata.search.SearchService;
 import com.linkedin.metadata.search.transformer.SearchDocumentTransformer;
+
 import java.util.ArrayList;
 import java.util.List;
 import javax.annotation.Nonnull;
@@ -41,6 +42,7 @@
 import org.springframework.context.annotation.Configuration;
 import org.springframework.context.annotation.Import;
 import org.springframework.context.annotation.Scope;
+import org.springframework.core.io.Resource;
 
 
 @Configuration
@@ -89,13 +91,16 @@ public class BootstrapManagerFactory {
   @Value("${bootstrap.backfillBrowsePathsV2.enabled}")
   private Boolean _backfillBrowsePathsV2Enabled;
 
+  @Value("${bootstrap.policies.file}")
+  private Resource _policiesResource;
+
   @Bean(name = "bootstrapManager")
   @Scope("singleton")
   @Nonnull
   protected BootstrapManager createInstance() {
     final IngestRootUserStep ingestRootUserStep = new IngestRootUserStep(_entityService);
     final IngestPoliciesStep ingestPoliciesStep =
-        new IngestPoliciesStep(_entityRegistry, _entityService, _entitySearchService, _searchDocumentTransformer);
+        new IngestPoliciesStep(_entityRegistry, _entityService, _entitySearchService, _searchDocumentTransformer, _policiesResource);
     final IngestRolesStep ingestRolesStep = new IngestRolesStep(_entityService, _entityRegistry);
     final IngestDataPlatformsStep ingestDataPlatformsStep = new IngestDataPlatformsStep(_entityService);
     final IngestDataPlatformInstancesStep ingestDataPlatformInstancesStep =
diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java
index 87dcfd736da40..cf29645214466 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java
@@ -25,6 +25,7 @@
 import com.linkedin.mxe.GenericAspect;
 import com.linkedin.mxe.MetadataChangeProposal;
 import com.linkedin.policy.DataHubPolicyInfo;
+
 import java.io.IOException;
 import java.net.URISyntaxException;
 import java.util.Collections;
@@ -35,7 +36,8 @@
 
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.springframework.core.io.ClassPathResource;
+import org.springframework.core.io.Resource;
+
 
 import static com.linkedin.metadata.Constants.*;
 
@@ -52,6 +54,8 @@ public class IngestPoliciesStep implements BootstrapStep {
   private final EntitySearchService _entitySearchService;
   private final SearchDocumentTransformer _searchDocumentTransformer;
 
+  private final Resource _policiesResource;
+
   @Override
   public String name() {
     return "IngestPoliciesStep";
@@ -66,10 +70,10 @@ public void execute() throws IOException, URISyntaxException {
         .maxStringLength(maxSize).build());
 
     // 0. Execute preflight check to see whether we need to ingest policies
-    log.info("Ingesting default access policies...");
+    log.info("Ingesting default access policies from: {}...", _policiesResource);
 
     // 1. Read from the file into JSON.
-    final JsonNode policiesObj = mapper.readTree(new ClassPathResource("./boot/policies.json").getFile());
+    final JsonNode policiesObj = mapper.readTree(_policiesResource.getFile());
 
     if (!policiesObj.isArray()) {
       throw new RuntimeException(

From 10a190470e8c932b6d34cba49de7dbcba687a088 Mon Sep 17 00:00:00 2001
From: siddiquebagwan-gslab <mohdsiddique.bagwan@gslab.com>
Date: Wed, 11 Oct 2023 08:54:08 +0530
Subject: [PATCH 113/156] feat(ingestion/redshift): CLL support in redshift
 (#8921)

---
 .../ingestion/source/redshift/config.py       |   4 +
 .../ingestion/source/redshift/lineage.py      | 215 +++++++++++++-----
 .../ingestion/source/redshift/redshift.py     |   1 +
 .../tests/unit/test_redshift_lineage.py       |  95 ++++++--
 4 files changed, 234 insertions(+), 81 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
index 804a14b0fe1cf..2789b800940db 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py
@@ -132,6 +132,10 @@ class RedshiftConfig(
         description="Whether `schema_pattern` is matched against fully qualified schema name `<database>.<schema>`.",
     )
 
+    extract_column_level_lineage: bool = Field(
+        default=True, description="Whether to extract column level lineage."
+    )
+
     @root_validator(pre=True)
     def check_email_is_set_on_usage(cls, values):
         if values.get("include_usage_statistics"):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py
index bbe52b5d98ba3..c9ddfbe92ab2a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py
@@ -9,10 +9,12 @@
 
 import humanfriendly
 import redshift_connector
-from sqllineage.runner import LineageRunner
 
+import datahub.emitter.mce_builder as builder
+import datahub.utilities.sqlglot_lineage as sqlglot_l
 from datahub.emitter import mce_builder
 from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
+from datahub.ingestion.api.common import PipelineContext
 from datahub.ingestion.source.aws.s3_util import strip_s3_prefix
 from datahub.ingestion.source.redshift.common import get_db_name
 from datahub.ingestion.source.redshift.config import LineageMode, RedshiftConfig
@@ -28,13 +30,19 @@
 from datahub.ingestion.source.state.redundant_run_skip_handler import (
     RedundantLineageRunSkipHandler,
 )
-from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage
+from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
+    FineGrainedLineage,
+    FineGrainedLineageDownstreamType,
+    FineGrainedLineageUpstreamType,
+    UpstreamLineage,
+)
 from datahub.metadata.schema_classes import (
     DatasetLineageTypeClass,
     UpstreamClass,
     UpstreamLineageClass,
 )
 from datahub.utilities import memory_footprint
+from datahub.utilities.urns import dataset_urn
 
 logger: logging.Logger = logging.getLogger(__name__)
 
@@ -56,13 +64,14 @@ class LineageCollectorType(Enum):
 @dataclass(frozen=True, eq=True)
 class LineageDataset:
     platform: LineageDatasetPlatform
-    path: str
+    urn: str
 
 
 @dataclass()
 class LineageItem:
     dataset: LineageDataset
     upstreams: Set[LineageDataset]
+    cll: Optional[List[sqlglot_l.ColumnLineageInfo]]
     collector_type: LineageCollectorType
     dataset_lineage_type: str = field(init=False)
 
@@ -83,10 +92,12 @@ def __init__(
         self,
         config: RedshiftConfig,
         report: RedshiftReport,
+        context: PipelineContext,
         redundant_run_skip_handler: Optional[RedundantLineageRunSkipHandler] = None,
     ):
         self.config = config
         self.report = report
+        self.context = context
         self._lineage_map: Dict[str, LineageItem] = defaultdict()
 
         self.redundant_run_skip_handler = redundant_run_skip_handler
@@ -121,33 +132,37 @@ def _get_s3_path(self, path: str) -> str:
 
         return path
 
-    def _get_sources_from_query(self, db_name: str, query: str) -> List[LineageDataset]:
+    def _get_sources_from_query(
+        self, db_name: str, query: str
+    ) -> Tuple[List[LineageDataset], Optional[List[sqlglot_l.ColumnLineageInfo]]]:
         sources: List[LineageDataset] = list()
 
-        parser = LineageRunner(query)
+        parsed_result: Optional[
+            sqlglot_l.SqlParsingResult
+        ] = sqlglot_l.create_lineage_sql_parsed_result(
+            query=query,
+            platform=LineageDatasetPlatform.REDSHIFT.value,
+            platform_instance=self.config.platform_instance,
+            database=db_name,
+            schema=str(self.config.default_schema),
+            graph=self.context.graph,
+            env=self.config.env,
+        )
 
-        for table in parser.source_tables:
-            split = str(table).split(".")
-            if len(split) == 3:
-                db_name, source_schema, source_table = split
-            elif len(split) == 2:
-                source_schema, source_table = split
-            else:
-                raise ValueError(
-                    f"Invalid table name {table} in query {query}. "
-                    f"Expected format: [db_name].[schema].[table] or [schema].[table] or [table]."
-                )
+        if parsed_result is None:
+            logger.debug(f"native query parsing failed for {query}")
+            return sources, None
 
-            if source_schema == "<default>":
-                source_schema = str(self.config.default_schema)
+        logger.debug(f"parsed_result = {parsed_result}")
 
+        for table_urn in parsed_result.in_tables:
             source = LineageDataset(
                 platform=LineageDatasetPlatform.REDSHIFT,
-                path=f"{db_name}.{source_schema}.{source_table}",
+                urn=table_urn,
             )
             sources.append(source)
 
-        return sources
+        return sources, parsed_result.column_lineage
 
     def _build_s3_path_from_row(self, filename: str) -> str:
         path = filename.strip()
@@ -165,9 +180,11 @@ def _get_sources(
         source_table: Optional[str],
         ddl: Optional[str],
         filename: Optional[str],
-    ) -> List[LineageDataset]:
+    ) -> Tuple[List[LineageDataset], Optional[List[sqlglot_l.ColumnLineageInfo]]]:
         sources: List[LineageDataset] = list()
         # Source
+        cll: Optional[List[sqlglot_l.ColumnLineageInfo]] = None
+
         if (
             lineage_type
             in {
@@ -177,7 +194,7 @@ def _get_sources(
             and ddl is not None
         ):
             try:
-                sources = self._get_sources_from_query(db_name=db_name, query=ddl)
+                sources, cll = self._get_sources_from_query(db_name=db_name, query=ddl)
             except Exception as e:
                 logger.warning(
                     f"Error parsing query {ddl} for getting lineage. Error was {e}."
@@ -192,22 +209,38 @@ def _get_sources(
                         "Only s3 source supported with copy. The source was: {path}."
                     )
                     self.report.num_lineage_dropped_not_support_copy_path += 1
-                    return sources
+                    return sources, cll
                 path = strip_s3_prefix(self._get_s3_path(path))
+                urn = make_dataset_urn_with_platform_instance(
+                    platform=platform.value,
+                    name=path,
+                    env=self.config.env,
+                    platform_instance=self.config.platform_instance_map.get(
+                        platform.value
+                    )
+                    if self.config.platform_instance_map is not None
+                    else None,
+                )
             elif source_schema is not None and source_table is not None:
                 platform = LineageDatasetPlatform.REDSHIFT
                 path = f"{db_name}.{source_schema}.{source_table}"
+                urn = make_dataset_urn_with_platform_instance(
+                    platform=platform.value,
+                    platform_instance=self.config.platform_instance,
+                    name=path,
+                    env=self.config.env,
+                )
             else:
-                return []
+                return [], cll
 
             sources = [
                 LineageDataset(
                     platform=platform,
-                    path=path,
+                    urn=urn,
                 )
             ]
 
-        return sources
+        return sources, cll
 
     def _populate_lineage_map(
         self,
@@ -231,6 +264,7 @@ def _populate_lineage_map(
         :rtype: None
         """
         try:
+            cll: Optional[List[sqlglot_l.ColumnLineageInfo]] = None
             raw_db_name = database
             alias_db_name = get_db_name(self.config)
 
@@ -243,7 +277,7 @@ def _populate_lineage_map(
                 if not target:
                     continue
 
-                sources = self._get_sources(
+                sources, cll = self._get_sources(
                     lineage_type,
                     alias_db_name,
                     source_schema=lineage_row.source_schema,
@@ -251,6 +285,7 @@ def _populate_lineage_map(
                     ddl=lineage_row.ddl,
                     filename=lineage_row.filename,
                 )
+                target.cll = cll
 
                 target.upstreams.update(
                     self._get_upstream_lineages(
@@ -262,20 +297,16 @@ def _populate_lineage_map(
                 )
 
                 # Merging downstreams if dataset already exists and has downstreams
-                if target.dataset.path in self._lineage_map:
-                    self._lineage_map[
-                        target.dataset.path
-                    ].upstreams = self._lineage_map[
-                        target.dataset.path
-                    ].upstreams.union(
-                        target.upstreams
-                    )
+                if target.dataset.urn in self._lineage_map:
+                    self._lineage_map[target.dataset.urn].upstreams = self._lineage_map[
+                        target.dataset.urn
+                    ].upstreams.union(target.upstreams)
 
                 else:
-                    self._lineage_map[target.dataset.path] = target
+                    self._lineage_map[target.dataset.urn] = target
 
                 logger.debug(
-                    f"Lineage[{target}]:{self._lineage_map[target.dataset.path]}"
+                    f"Lineage[{target}]:{self._lineage_map[target.dataset.urn]}"
                 )
         except Exception as e:
             self.warn(
@@ -308,17 +339,34 @@ def _get_target_lineage(
                 target_platform = LineageDatasetPlatform.S3
                 # Following call requires 'filename' key in lineage_row
                 target_path = self._build_s3_path_from_row(lineage_row.filename)
+                urn = make_dataset_urn_with_platform_instance(
+                    platform=target_platform.value,
+                    name=target_path,
+                    env=self.config.env,
+                    platform_instance=self.config.platform_instance_map.get(
+                        target_platform.value
+                    )
+                    if self.config.platform_instance_map is not None
+                    else None,
+                )
             except ValueError as e:
                 self.warn(logger, "non-s3-lineage", str(e))
                 return None
         else:
             target_platform = LineageDatasetPlatform.REDSHIFT
             target_path = f"{alias_db_name}.{lineage_row.target_schema}.{lineage_row.target_table}"
+            urn = make_dataset_urn_with_platform_instance(
+                platform=target_platform.value,
+                platform_instance=self.config.platform_instance,
+                name=target_path,
+                env=self.config.env,
+            )
 
         return LineageItem(
-            dataset=LineageDataset(platform=target_platform, path=target_path),
+            dataset=LineageDataset(platform=target_platform, urn=urn),
             upstreams=set(),
             collector_type=lineage_type,
+            cll=None,
         )
 
     def _get_upstream_lineages(
@@ -331,11 +379,22 @@ def _get_upstream_lineages(
         targe_source = []
         for source in sources:
             if source.platform == LineageDatasetPlatform.REDSHIFT:
-                db, schema, table = source.path.split(".")
+                qualified_table_name = dataset_urn.DatasetUrn.create_from_string(
+                    source.urn
+                ).get_entity_id()[1]
+                db, schema, table = qualified_table_name.split(".")
                 if db == raw_db_name:
                     db = alias_db_name
                     path = f"{db}.{schema}.{table}"
-                    source = LineageDataset(platform=source.platform, path=path)
+                    source = LineageDataset(
+                        platform=source.platform,
+                        urn=make_dataset_urn_with_platform_instance(
+                            platform=LineageDatasetPlatform.REDSHIFT.value,
+                            platform_instance=self.config.platform_instance,
+                            name=path,
+                            env=self.config.env,
+                        ),
+                    )
 
                 # Filtering out tables which does not exist in Redshift
                 # It was deleted in the meantime or query parser did not capture well the table name
@@ -345,7 +404,7 @@ def _get_upstream_lineages(
                     or not any(table == t.name for t in all_tables[db][schema])
                 ):
                     logger.debug(
-                        f"{source.path} missing table, dropping from lineage.",
+                        f"{source.urn} missing table, dropping from lineage.",
                     )
                     self.report.num_lineage_tables_dropped += 1
                     continue
@@ -433,36 +492,73 @@ def populate_lineage(
             memory_footprint.total_size(self._lineage_map)
         )
 
+    def make_fine_grained_lineage_class(
+        self, lineage_item: LineageItem, dataset_urn: str
+    ) -> List[FineGrainedLineage]:
+        fine_grained_lineages: List[FineGrainedLineage] = []
+
+        if (
+            self.config.extract_column_level_lineage is False
+            or lineage_item.cll is None
+        ):
+            logger.debug("CLL extraction is disabled")
+            return fine_grained_lineages
+
+        logger.debug("Extracting column level lineage")
+
+        cll: List[sqlglot_l.ColumnLineageInfo] = lineage_item.cll
+
+        for cll_info in cll:
+            downstream = (
+                [builder.make_schema_field_urn(dataset_urn, cll_info.downstream.column)]
+                if cll_info.downstream is not None
+                and cll_info.downstream.column is not None
+                else []
+            )
+
+            upstreams = [
+                builder.make_schema_field_urn(column_ref.table, column_ref.column)
+                for column_ref in cll_info.upstreams
+            ]
+
+            fine_grained_lineages.append(
+                FineGrainedLineage(
+                    downstreamType=FineGrainedLineageDownstreamType.FIELD,
+                    downstreams=downstream,
+                    upstreamType=FineGrainedLineageUpstreamType.FIELD_SET,
+                    upstreams=upstreams,
+                )
+            )
+
+        logger.debug(f"Created fine_grained_lineage for {dataset_urn}")
+
+        return fine_grained_lineages
+
     def get_lineage(
         self,
         table: Union[RedshiftTable, RedshiftView],
         dataset_urn: str,
         schema: RedshiftSchema,
     ) -> Optional[Tuple[UpstreamLineageClass, Dict[str, str]]]:
-        dataset_key = mce_builder.dataset_urn_to_key(dataset_urn)
-        if dataset_key is None:
-            return None
 
         upstream_lineage: List[UpstreamClass] = []
 
-        if dataset_key.name in self._lineage_map:
-            item = self._lineage_map[dataset_key.name]
+        cll_lineage: List[FineGrainedLineage] = []
+
+        if dataset_urn in self._lineage_map:
+            item = self._lineage_map[dataset_urn]
             for upstream in item.upstreams:
                 upstream_table = UpstreamClass(
-                    dataset=make_dataset_urn_with_platform_instance(
-                        upstream.platform.value,
-                        upstream.path,
-                        platform_instance=self.config.platform_instance_map.get(
-                            upstream.platform.value
-                        )
-                        if self.config.platform_instance_map
-                        else None,
-                        env=self.config.env,
-                    ),
+                    dataset=upstream.urn,
                     type=item.dataset_lineage_type,
                 )
                 upstream_lineage.append(upstream_table)
 
+            cll_lineage = self.make_fine_grained_lineage_class(
+                lineage_item=item,
+                dataset_urn=dataset_urn,
+            )
+
         tablename = table.name
         if table.type == "EXTERNAL_TABLE":
             # external_db_params = schema.option
@@ -489,7 +585,12 @@ def get_lineage(
         else:
             return None
 
-        return UpstreamLineage(upstreams=upstream_lineage), {}
+        return (
+            UpstreamLineage(
+                upstreams=upstream_lineage, fineGrainedLineages=cll_lineage or None
+            ),
+            {},
+        )
 
     def report_status(self, step: str, status: bool) -> None:
         if self.redundant_run_skip_handler:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py
index e8a8ff976afa6..a1b6333a3775d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py
@@ -881,6 +881,7 @@ def extract_lineage(
         self.lineage_extractor = RedshiftLineageExtractor(
             config=self.config,
             report=self.report,
+            context=self.ctx,
             redundant_run_skip_handler=self.redundant_lineage_run_skip_handler,
         )
 
diff --git a/metadata-ingestion/tests/unit/test_redshift_lineage.py b/metadata-ingestion/tests/unit/test_redshift_lineage.py
index c7d6ac18e044c..db5af3a71efb9 100644
--- a/metadata-ingestion/tests/unit/test_redshift_lineage.py
+++ b/metadata-ingestion/tests/unit/test_redshift_lineage.py
@@ -1,6 +1,8 @@
+from datahub.ingestion.api.common import PipelineContext
 from datahub.ingestion.source.redshift.config import RedshiftConfig
 from datahub.ingestion.source.redshift.lineage import RedshiftLineageExtractor
 from datahub.ingestion.source.redshift.report import RedshiftReport
+from datahub.utilities.sqlglot_lineage import ColumnLineageInfo, DownstreamColumnRef
 
 
 def test_get_sources_from_query():
@@ -10,14 +12,20 @@ def test_get_sources_from_query():
     test_query = """
         select * from my_schema.my_table
     """
-    lineage_extractor = RedshiftLineageExtractor(config, report)
-    lineage_datasets = lineage_extractor._get_sources_from_query(
+    lineage_extractor = RedshiftLineageExtractor(
+        config, report, PipelineContext(run_id="foo")
+    )
+    lineage_datasets, _ = lineage_extractor._get_sources_from_query(
         db_name="test", query=test_query
     )
     assert len(lineage_datasets) == 1
 
     lineage = lineage_datasets[0]
-    assert lineage.path == "test.my_schema.my_table"
+
+    assert (
+        lineage.urn
+        == "urn:li:dataset:(urn:li:dataPlatform:redshift,test.my_schema.my_table,PROD)"
+    )
 
 
 def test_get_sources_from_query_with_only_table_name():
@@ -27,14 +35,20 @@ def test_get_sources_from_query_with_only_table_name():
     test_query = """
         select * from my_table
     """
-    lineage_extractor = RedshiftLineageExtractor(config, report)
-    lineage_datasets = lineage_extractor._get_sources_from_query(
+    lineage_extractor = RedshiftLineageExtractor(
+        config, report, PipelineContext(run_id="foo")
+    )
+    lineage_datasets, _ = lineage_extractor._get_sources_from_query(
         db_name="test", query=test_query
     )
     assert len(lineage_datasets) == 1
 
     lineage = lineage_datasets[0]
-    assert lineage.path == "test.public.my_table"
+
+    assert (
+        lineage.urn
+        == "urn:li:dataset:(urn:li:dataPlatform:redshift,test.public.my_table,PROD)"
+    )
 
 
 def test_get_sources_from_query_with_database():
@@ -44,14 +58,20 @@ def test_get_sources_from_query_with_database():
     test_query = """
         select * from test.my_schema.my_table
     """
-    lineage_extractor = RedshiftLineageExtractor(config, report)
-    lineage_datasets = lineage_extractor._get_sources_from_query(
+    lineage_extractor = RedshiftLineageExtractor(
+        config, report, PipelineContext(run_id="foo")
+    )
+    lineage_datasets, _ = lineage_extractor._get_sources_from_query(
         db_name="test", query=test_query
     )
     assert len(lineage_datasets) == 1
 
     lineage = lineage_datasets[0]
-    assert lineage.path == "test.my_schema.my_table"
+
+    assert (
+        lineage.urn
+        == "urn:li:dataset:(urn:li:dataPlatform:redshift,test.my_schema.my_table,PROD)"
+    )
 
 
 def test_get_sources_from_query_with_non_default_database():
@@ -61,14 +81,20 @@ def test_get_sources_from_query_with_non_default_database():
     test_query = """
         select * from test2.my_schema.my_table
     """
-    lineage_extractor = RedshiftLineageExtractor(config, report)
-    lineage_datasets = lineage_extractor._get_sources_from_query(
+    lineage_extractor = RedshiftLineageExtractor(
+        config, report, PipelineContext(run_id="foo")
+    )
+    lineage_datasets, _ = lineage_extractor._get_sources_from_query(
         db_name="test", query=test_query
     )
     assert len(lineage_datasets) == 1
 
     lineage = lineage_datasets[0]
-    assert lineage.path == "test2.my_schema.my_table"
+
+    assert (
+        lineage.urn
+        == "urn:li:dataset:(urn:li:dataPlatform:redshift,test2.my_schema.my_table,PROD)"
+    )
 
 
 def test_get_sources_from_query_with_only_table():
@@ -78,27 +104,48 @@ def test_get_sources_from_query_with_only_table():
     test_query = """
         select * from my_table
     """
-    lineage_extractor = RedshiftLineageExtractor(config, report)
-    lineage_datasets = lineage_extractor._get_sources_from_query(
+    lineage_extractor = RedshiftLineageExtractor(
+        config, report, PipelineContext(run_id="foo")
+    )
+    lineage_datasets, _ = lineage_extractor._get_sources_from_query(
         db_name="test", query=test_query
     )
     assert len(lineage_datasets) == 1
 
     lineage = lineage_datasets[0]
-    assert lineage.path == "test.public.my_table"
+
+    assert (
+        lineage.urn
+        == "urn:li:dataset:(urn:li:dataPlatform:redshift,test.public.my_table,PROD)"
+    )
 
 
-def test_get_sources_from_query_with_four_part_table_should_throw_exception():
+def test_cll():
     config = RedshiftConfig(host_port="localhost:5439", database="test")
     report = RedshiftReport()
 
     test_query = """
-        select * from database.schema.my_table.test
+        select a,b,c from db.public.customer inner join db.public.order on db.public.customer.id = db.public.order.customer_id
     """
-    lineage_extractor = RedshiftLineageExtractor(config, report)
-    try:
-        lineage_extractor._get_sources_from_query(db_name="test", query=test_query)
-    except ValueError:
-        pass
-
-    assert f"{test_query} should have thrown a ValueError exception but it didn't"
+    lineage_extractor = RedshiftLineageExtractor(
+        config, report, PipelineContext(run_id="foo")
+    )
+    _, cll = lineage_extractor._get_sources_from_query(db_name="db", query=test_query)
+
+    assert cll == [
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="a"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="b"),
+            upstreams=[],
+            logic=None,
+        ),
+        ColumnLineageInfo(
+            downstream=DownstreamColumnRef(table=None, column="c"),
+            upstreams=[],
+            logic=None,
+        ),
+    ]

From 4b6b941a2abf13854511c9af0e88a17d5acfd5e6 Mon Sep 17 00:00:00 2001
From: Harsha Mandadi <115464537+harsha-mandadi-4026@users.noreply.github.com>
Date: Wed, 11 Oct 2023 19:01:46 +0100
Subject: [PATCH 114/156] fix(ingest): Fix postgres lineage within views
 (#8906)

Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
Co-authored-by: Maggie Hays <maggiem.hays@gmail.com>
---
 .../datahub/ingestion/source/sql/postgres.py   | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py b/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py
index ba8655b83446d..a6a9d8e2c8597 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py
@@ -217,14 +217,15 @@ def _get_view_lineage_elements(
             key = (lineage.dependent_view, lineage.dependent_schema)
             # Append the source table to the list.
             lineage_elements[key].append(
-                mce_builder.make_dataset_urn(
-                    self.platform,
-                    self.get_identifier(
+                mce_builder.make_dataset_urn_with_platform_instance(
+                    platform=self.platform,
+                    name=self.get_identifier(
                         schema=lineage.source_schema,
                         entity=lineage.source_table,
                         inspector=inspector,
                     ),
-                    self.config.env,
+                    platform_instance=self.config.platform_instance,
+                    env=self.config.env,
                 )
             )
 
@@ -244,12 +245,13 @@ def _get_view_lineage_workunits(
             dependent_view, dependent_schema = key
 
             # Construct a lineage object.
-            urn = mce_builder.make_dataset_urn(
-                self.platform,
-                self.get_identifier(
+            urn = mce_builder.make_dataset_urn_with_platform_instance(
+                platform=self.platform,
+                name=self.get_identifier(
                     schema=dependent_schema, entity=dependent_view, inspector=inspector
                 ),
-                self.config.env,
+                platform_instance=self.config.platform_instance,
+                env=self.config.env,
             )
 
             # use the mce_builder to ensure that the change proposal inherits

From 932fbcddbf7c3201898e0918218e80c9246b0cd2 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Wed, 11 Oct 2023 14:17:02 -0400
Subject: [PATCH 115/156] refactor(ingest/dbt): move dbt tests logic to
 dedicated file (#8984)

---
 .../src/datahub/ingestion/api/common.py       |   9 +
 .../datahub/ingestion/source/csv_enricher.py  |   8 +-
 .../datahub/ingestion/source/dbt/dbt_cloud.py |   3 +-
 .../ingestion/source/dbt/dbt_common.py        | 278 +-----------------
 .../datahub/ingestion/source/dbt/dbt_core.py  |   3 +-
 .../datahub/ingestion/source/dbt/dbt_tests.py | 261 ++++++++++++++++
 6 files changed, 288 insertions(+), 274 deletions(-)
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_tests.py

diff --git a/metadata-ingestion/src/datahub/ingestion/api/common.py b/metadata-ingestion/src/datahub/ingestion/api/common.py
index 778bd119615e2..a6761a3c77d5e 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/common.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/common.py
@@ -2,6 +2,7 @@
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Dict, Generic, Iterable, Optional, Tuple, TypeVar
 
+from datahub.configuration.common import ConfigurationError
 from datahub.emitter.mce_builder import set_dataset_urn_to_lower
 from datahub.ingestion.api.committable import Committable
 from datahub.ingestion.graph.client import DataHubGraph
@@ -75,3 +76,11 @@ def register_checkpointer(self, committable: Committable) -> None:
 
     def get_committables(self) -> Iterable[Tuple[str, Committable]]:
         yield from self.checkpointers.items()
+
+    def require_graph(self, operation: Optional[str] = None) -> DataHubGraph:
+        if not self.graph:
+            raise ConfigurationError(
+                f"{operation or 'This operation'} requires a graph, but none was provided. "
+                "To provide one, either use the datahub-rest sink or set the top-level datahub_api config in the recipe."
+            )
+        return self.graph
diff --git a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py
index 7cb487a86d931..611f0c5c52cc6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py
@@ -129,11 +129,9 @@ def __init__(self, config: CSVEnricherConfig, ctx: PipelineContext):
         # Map from entity urn to a list of SubResourceRow.
         self.editable_schema_metadata_map: Dict[str, List[SubResourceRow]] = {}
         self.should_overwrite: bool = self.config.write_semantics == "OVERRIDE"
-        if not self.should_overwrite and not self.ctx.graph:
-            raise ConfigurationError(
-                "With PATCH semantics, the csv-enricher source requires a datahub_api to connect to. "
-                "Consider using the datahub-rest sink or provide a datahub_api: configuration on your ingestion recipe."
-            )
+
+        if not self.should_overwrite:
+            self.ctx.require_graph(operation="The csv-enricher's PATCH semantics flag")
 
     def get_resource_glossary_terms_work_unit(
         self,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py
index af9769bc9d94c..da1ea8ecb4678 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py
@@ -20,9 +20,8 @@
     DBTCommonConfig,
     DBTNode,
     DBTSourceBase,
-    DBTTest,
-    DBTTestResult,
 )
+from datahub.ingestion.source.dbt.dbt_tests import DBTTest, DBTTestResult
 
 logger = logging.getLogger(__name__)
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
index 0f5c08eb6ac54..48d2118a9b091 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
@@ -1,11 +1,10 @@
-import json
 import logging
 import re
 from abc import abstractmethod
 from dataclasses import dataclass, field
 from datetime import datetime
 from enum import auto
-from typing import Any, Callable, ClassVar, Dict, Iterable, List, Optional, Tuple, Union
+from typing import Any, Dict, Iterable, List, Optional, Tuple
 
 import pydantic
 from pydantic import root_validator, validator
@@ -34,6 +33,12 @@
 from datahub.ingestion.api.source import MetadataWorkUnitProcessor
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.common.subtypes import DatasetSubTypes
+from datahub.ingestion.source.dbt.dbt_tests import (
+    DBTTest,
+    DBTTestResult,
+    make_assertion_from_test,
+    make_assertion_result_from_test,
+)
 from datahub.ingestion.source.sql.sql_types import (
     ATHENA_SQL_TYPES_MAP,
     BIGQUERY_TYPES_MAP,
@@ -81,20 +86,7 @@
     TimeTypeClass,
 )
 from datahub.metadata.schema_classes import (
-    AssertionInfoClass,
-    AssertionResultClass,
-    AssertionResultTypeClass,
-    AssertionRunEventClass,
-    AssertionRunStatusClass,
-    AssertionStdAggregationClass,
-    AssertionStdOperatorClass,
-    AssertionStdParameterClass,
-    AssertionStdParametersClass,
-    AssertionStdParameterTypeClass,
-    AssertionTypeClass,
     DataPlatformInstanceClass,
-    DatasetAssertionInfoClass,
-    DatasetAssertionScopeClass,
     DatasetPropertiesClass,
     GlobalTagsClass,
     GlossaryTermsClass,
@@ -551,134 +543,6 @@ def get_column_type(
     return SchemaFieldDataType(type=TypeClass())
 
 
-@dataclass
-class AssertionParams:
-    scope: Union[DatasetAssertionScopeClass, str]
-    operator: Union[AssertionStdOperatorClass, str]
-    aggregation: Union[AssertionStdAggregationClass, str]
-    parameters: Optional[Callable[[Dict[str, str]], AssertionStdParametersClass]] = None
-    logic_fn: Optional[Callable[[Dict[str, str]], Optional[str]]] = None
-
-
-def _get_name_for_relationship_test(kw_args: Dict[str, str]) -> Optional[str]:
-    """
-    Try to produce a useful string for the name of a relationship constraint.
-    Return None if we fail to
-    """
-    destination_ref = kw_args.get("to")
-    source_ref = kw_args.get("model")
-    column_name = kw_args.get("column_name")
-    dest_field_name = kw_args.get("field")
-    if not destination_ref or not source_ref or not column_name or not dest_field_name:
-        # base assertions are violated, bail early
-        return None
-    m = re.match(r"^ref\(\'(.*)\'\)$", destination_ref)
-    if m:
-        destination_table = m.group(1)
-    else:
-        destination_table = destination_ref
-    m = re.search(r"ref\(\'(.*)\'\)", source_ref)
-    if m:
-        source_table = m.group(1)
-    else:
-        source_table = source_ref
-    return f"{source_table}.{column_name} referential integrity to {destination_table}.{dest_field_name}"
-
-
-@dataclass
-class DBTTest:
-    qualified_test_name: str
-    column_name: Optional[str]
-    kw_args: dict
-
-    TEST_NAME_TO_ASSERTION_MAP: ClassVar[Dict[str, AssertionParams]] = {
-        "not_null": AssertionParams(
-            scope=DatasetAssertionScopeClass.DATASET_COLUMN,
-            operator=AssertionStdOperatorClass.NOT_NULL,
-            aggregation=AssertionStdAggregationClass.IDENTITY,
-        ),
-        "unique": AssertionParams(
-            scope=DatasetAssertionScopeClass.DATASET_COLUMN,
-            operator=AssertionStdOperatorClass.EQUAL_TO,
-            aggregation=AssertionStdAggregationClass.UNIQUE_PROPOTION,
-            parameters=lambda _: AssertionStdParametersClass(
-                value=AssertionStdParameterClass(
-                    value="1.0",
-                    type=AssertionStdParameterTypeClass.NUMBER,
-                )
-            ),
-        ),
-        "accepted_values": AssertionParams(
-            scope=DatasetAssertionScopeClass.DATASET_COLUMN,
-            operator=AssertionStdOperatorClass.IN,
-            aggregation=AssertionStdAggregationClass.IDENTITY,
-            parameters=lambda kw_args: AssertionStdParametersClass(
-                value=AssertionStdParameterClass(
-                    value=json.dumps(kw_args.get("values")),
-                    type=AssertionStdParameterTypeClass.SET,
-                ),
-            ),
-        ),
-        "relationships": AssertionParams(
-            scope=DatasetAssertionScopeClass.DATASET_COLUMN,
-            operator=AssertionStdOperatorClass._NATIVE_,
-            aggregation=AssertionStdAggregationClass.IDENTITY,
-            parameters=lambda kw_args: AssertionStdParametersClass(
-                value=AssertionStdParameterClass(
-                    value=json.dumps(kw_args.get("values")),
-                    type=AssertionStdParameterTypeClass.SET,
-                ),
-            ),
-            logic_fn=_get_name_for_relationship_test,
-        ),
-        "dbt_expectations.expect_column_values_to_not_be_null": AssertionParams(
-            scope=DatasetAssertionScopeClass.DATASET_COLUMN,
-            operator=AssertionStdOperatorClass.NOT_NULL,
-            aggregation=AssertionStdAggregationClass.IDENTITY,
-        ),
-        "dbt_expectations.expect_column_values_to_be_between": AssertionParams(
-            scope=DatasetAssertionScopeClass.DATASET_COLUMN,
-            operator=AssertionStdOperatorClass.BETWEEN,
-            aggregation=AssertionStdAggregationClass.IDENTITY,
-            parameters=lambda x: AssertionStdParametersClass(
-                minValue=AssertionStdParameterClass(
-                    value=str(x.get("min_value", "unknown")),
-                    type=AssertionStdParameterTypeClass.NUMBER,
-                ),
-                maxValue=AssertionStdParameterClass(
-                    value=str(x.get("max_value", "unknown")),
-                    type=AssertionStdParameterTypeClass.NUMBER,
-                ),
-            ),
-        ),
-        "dbt_expectations.expect_column_values_to_be_in_set": AssertionParams(
-            scope=DatasetAssertionScopeClass.DATASET_COLUMN,
-            operator=AssertionStdOperatorClass.IN,
-            aggregation=AssertionStdAggregationClass.IDENTITY,
-            parameters=lambda kw_args: AssertionStdParametersClass(
-                value=AssertionStdParameterClass(
-                    value=json.dumps(kw_args.get("value_set")),
-                    type=AssertionStdParameterTypeClass.SET,
-                ),
-            ),
-        ),
-    }
-
-
-@dataclass
-class DBTTestResult:
-    invocation_id: str
-
-    status: str
-    execution_time: datetime
-
-    native_results: Dict[str, str]
-
-
-def string_map(input_map: Dict[str, Any]) -> Dict[str, str]:
-    return {k: str(v) for k, v in input_map.items()}
-
-
 @platform_name("dbt")
 @config_class(DBTCommonConfig)
 @support_status(SupportStatus.CERTIFIED)
@@ -750,7 +614,7 @@ def create_test_entity_mcps(
 
             for upstream_urn in sorted(upstream_urns):
                 if self.config.entities_enabled.can_emit_node_type("test"):
-                    yield self._make_assertion_from_test(
+                    yield make_assertion_from_test(
                         custom_props,
                         node,
                         assertion_urn,
@@ -759,133 +623,17 @@ def create_test_entity_mcps(
 
                 if node.test_result:
                     if self.config.entities_enabled.can_emit_test_results:
-                        yield self._make_assertion_result_from_test(
-                            node, assertion_urn, upstream_urn
+                        yield make_assertion_result_from_test(
+                            node,
+                            assertion_urn,
+                            upstream_urn,
+                            test_warnings_are_errors=self.config.test_warnings_are_errors,
                         )
                     else:
                         logger.debug(
                             f"Skipping test result {node.name} emission since it is turned off."
                         )
 
-    def _make_assertion_from_test(
-        self,
-        extra_custom_props: Dict[str, str],
-        node: DBTNode,
-        assertion_urn: str,
-        upstream_urn: str,
-    ) -> MetadataWorkUnit:
-        assert node.test_info
-        qualified_test_name = node.test_info.qualified_test_name
-        column_name = node.test_info.column_name
-        kw_args = node.test_info.kw_args
-
-        if qualified_test_name in DBTTest.TEST_NAME_TO_ASSERTION_MAP:
-            assertion_params = DBTTest.TEST_NAME_TO_ASSERTION_MAP[qualified_test_name]
-            assertion_info = AssertionInfoClass(
-                type=AssertionTypeClass.DATASET,
-                customProperties=extra_custom_props,
-                datasetAssertion=DatasetAssertionInfoClass(
-                    dataset=upstream_urn,
-                    scope=assertion_params.scope,
-                    operator=assertion_params.operator,
-                    fields=[
-                        mce_builder.make_schema_field_urn(upstream_urn, column_name)
-                    ]
-                    if (
-                        assertion_params.scope
-                        == DatasetAssertionScopeClass.DATASET_COLUMN
-                        and column_name
-                    )
-                    else [],
-                    nativeType=node.name,
-                    aggregation=assertion_params.aggregation,
-                    parameters=assertion_params.parameters(kw_args)
-                    if assertion_params.parameters
-                    else None,
-                    logic=assertion_params.logic_fn(kw_args)
-                    if assertion_params.logic_fn
-                    else None,
-                    nativeParameters=string_map(kw_args),
-                ),
-            )
-        elif column_name:
-            # no match with known test types, column-level test
-            assertion_info = AssertionInfoClass(
-                type=AssertionTypeClass.DATASET,
-                customProperties=extra_custom_props,
-                datasetAssertion=DatasetAssertionInfoClass(
-                    dataset=upstream_urn,
-                    scope=DatasetAssertionScopeClass.DATASET_COLUMN,
-                    operator=AssertionStdOperatorClass._NATIVE_,
-                    fields=[
-                        mce_builder.make_schema_field_urn(upstream_urn, column_name)
-                    ],
-                    nativeType=node.name,
-                    logic=node.compiled_code or node.raw_code,
-                    aggregation=AssertionStdAggregationClass._NATIVE_,
-                    nativeParameters=string_map(kw_args),
-                ),
-            )
-        else:
-            # no match with known test types, default to row-level test
-            assertion_info = AssertionInfoClass(
-                type=AssertionTypeClass.DATASET,
-                customProperties=extra_custom_props,
-                datasetAssertion=DatasetAssertionInfoClass(
-                    dataset=upstream_urn,
-                    scope=DatasetAssertionScopeClass.DATASET_ROWS,
-                    operator=AssertionStdOperatorClass._NATIVE_,
-                    logic=node.compiled_code or node.raw_code,
-                    nativeType=node.name,
-                    aggregation=AssertionStdAggregationClass._NATIVE_,
-                    nativeParameters=string_map(kw_args),
-                ),
-            )
-
-        wu = MetadataChangeProposalWrapper(
-            entityUrn=assertion_urn,
-            aspect=assertion_info,
-        ).as_workunit()
-
-        return wu
-
-    def _make_assertion_result_from_test(
-        self,
-        node: DBTNode,
-        assertion_urn: str,
-        upstream_urn: str,
-    ) -> MetadataWorkUnit:
-        assert node.test_result
-        test_result = node.test_result
-
-        assertionResult = AssertionRunEventClass(
-            timestampMillis=int(test_result.execution_time.timestamp() * 1000.0),
-            assertionUrn=assertion_urn,
-            asserteeUrn=upstream_urn,
-            runId=test_result.invocation_id,
-            result=AssertionResultClass(
-                type=AssertionResultTypeClass.SUCCESS
-                if test_result.status == "pass"
-                or (
-                    not self.config.test_warnings_are_errors
-                    and test_result.status == "warn"
-                )
-                else AssertionResultTypeClass.FAILURE,
-                nativeResults=test_result.native_results,
-            ),
-            status=AssertionRunStatusClass.COMPLETE,
-        )
-
-        event = MetadataChangeProposalWrapper(
-            entityUrn=assertion_urn,
-            aspect=assertionResult,
-        )
-        wu = MetadataWorkUnit(
-            id=f"{assertion_urn}-assertionRunEvent-{upstream_urn}",
-            mcp=event,
-        )
-        return wu
-
     @abstractmethod
     def load_nodes(self) -> Tuple[List[DBTNode], Dict[str, Optional[str]]]:
         # return dbt nodes + global custom properties
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py
index c08295ed1dc59..dc3a84847beb2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py
@@ -26,9 +26,8 @@
     DBTNode,
     DBTSourceBase,
     DBTSourceReport,
-    DBTTest,
-    DBTTestResult,
 )
+from datahub.ingestion.source.dbt.dbt_tests import DBTTest, DBTTestResult
 
 logger = logging.getLogger(__name__)
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_tests.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_tests.py
new file mode 100644
index 0000000000000..721769d214d9e
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_tests.py
@@ -0,0 +1,261 @@
+import json
+import re
+from dataclasses import dataclass
+from datetime import datetime
+from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Union
+
+from datahub.emitter import mce_builder
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.metadata.schema_classes import (
+    AssertionInfoClass,
+    AssertionResultClass,
+    AssertionResultTypeClass,
+    AssertionRunEventClass,
+    AssertionRunStatusClass,
+    AssertionStdAggregationClass,
+    AssertionStdOperatorClass,
+    AssertionStdParameterClass,
+    AssertionStdParametersClass,
+    AssertionStdParameterTypeClass,
+    AssertionTypeClass,
+    DatasetAssertionInfoClass,
+    DatasetAssertionScopeClass,
+)
+
+if TYPE_CHECKING:
+    from datahub.ingestion.source.dbt.dbt_common import DBTNode
+
+
+@dataclass
+class DBTTest:
+    qualified_test_name: str
+    column_name: Optional[str]
+    kw_args: dict
+
+
+@dataclass
+class DBTTestResult:
+    invocation_id: str
+
+    status: str
+    execution_time: datetime
+
+    native_results: Dict[str, str]
+
+
+def _get_name_for_relationship_test(kw_args: Dict[str, str]) -> Optional[str]:
+    """
+    Try to produce a useful string for the name of a relationship constraint.
+    Return None if we fail to
+    """
+    destination_ref = kw_args.get("to")
+    source_ref = kw_args.get("model")
+    column_name = kw_args.get("column_name")
+    dest_field_name = kw_args.get("field")
+    if not destination_ref or not source_ref or not column_name or not dest_field_name:
+        # base assertions are violated, bail early
+        return None
+    m = re.match(r"^ref\(\'(.*)\'\)$", destination_ref)
+    if m:
+        destination_table = m.group(1)
+    else:
+        destination_table = destination_ref
+    m = re.search(r"ref\(\'(.*)\'\)", source_ref)
+    if m:
+        source_table = m.group(1)
+    else:
+        source_table = source_ref
+    return f"{source_table}.{column_name} referential integrity to {destination_table}.{dest_field_name}"
+
+
+@dataclass
+class AssertionParams:
+    scope: Union[DatasetAssertionScopeClass, str]
+    operator: Union[AssertionStdOperatorClass, str]
+    aggregation: Union[AssertionStdAggregationClass, str]
+    parameters: Optional[Callable[[Dict[str, str]], AssertionStdParametersClass]] = None
+    logic_fn: Optional[Callable[[Dict[str, str]], Optional[str]]] = None
+
+
+_DBT_TEST_NAME_TO_ASSERTION_MAP: Dict[str, AssertionParams] = {
+    "not_null": AssertionParams(
+        scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+        operator=AssertionStdOperatorClass.NOT_NULL,
+        aggregation=AssertionStdAggregationClass.IDENTITY,
+    ),
+    "unique": AssertionParams(
+        scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+        operator=AssertionStdOperatorClass.EQUAL_TO,
+        aggregation=AssertionStdAggregationClass.UNIQUE_PROPOTION,
+        parameters=lambda _: AssertionStdParametersClass(
+            value=AssertionStdParameterClass(
+                value="1.0",
+                type=AssertionStdParameterTypeClass.NUMBER,
+            )
+        ),
+    ),
+    "accepted_values": AssertionParams(
+        scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+        operator=AssertionStdOperatorClass.IN,
+        aggregation=AssertionStdAggregationClass.IDENTITY,
+        parameters=lambda kw_args: AssertionStdParametersClass(
+            value=AssertionStdParameterClass(
+                value=json.dumps(kw_args.get("values")),
+                type=AssertionStdParameterTypeClass.SET,
+            ),
+        ),
+    ),
+    "relationships": AssertionParams(
+        scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+        operator=AssertionStdOperatorClass._NATIVE_,
+        aggregation=AssertionStdAggregationClass.IDENTITY,
+        parameters=lambda kw_args: AssertionStdParametersClass(
+            value=AssertionStdParameterClass(
+                value=json.dumps(kw_args.get("values")),
+                type=AssertionStdParameterTypeClass.SET,
+            ),
+        ),
+        logic_fn=_get_name_for_relationship_test,
+    ),
+    "dbt_expectations.expect_column_values_to_not_be_null": AssertionParams(
+        scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+        operator=AssertionStdOperatorClass.NOT_NULL,
+        aggregation=AssertionStdAggregationClass.IDENTITY,
+    ),
+    "dbt_expectations.expect_column_values_to_be_between": AssertionParams(
+        scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+        operator=AssertionStdOperatorClass.BETWEEN,
+        aggregation=AssertionStdAggregationClass.IDENTITY,
+        parameters=lambda x: AssertionStdParametersClass(
+            minValue=AssertionStdParameterClass(
+                value=str(x.get("min_value", "unknown")),
+                type=AssertionStdParameterTypeClass.NUMBER,
+            ),
+            maxValue=AssertionStdParameterClass(
+                value=str(x.get("max_value", "unknown")),
+                type=AssertionStdParameterTypeClass.NUMBER,
+            ),
+        ),
+    ),
+    "dbt_expectations.expect_column_values_to_be_in_set": AssertionParams(
+        scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+        operator=AssertionStdOperatorClass.IN,
+        aggregation=AssertionStdAggregationClass.IDENTITY,
+        parameters=lambda kw_args: AssertionStdParametersClass(
+            value=AssertionStdParameterClass(
+                value=json.dumps(kw_args.get("value_set")),
+                type=AssertionStdParameterTypeClass.SET,
+            ),
+        ),
+    ),
+}
+
+
+def _string_map(input_map: Dict[str, Any]) -> Dict[str, str]:
+    return {k: str(v) for k, v in input_map.items()}
+
+
+def make_assertion_from_test(
+    extra_custom_props: Dict[str, str],
+    node: "DBTNode",
+    assertion_urn: str,
+    upstream_urn: str,
+) -> MetadataWorkUnit:
+    assert node.test_info
+    qualified_test_name = node.test_info.qualified_test_name
+    column_name = node.test_info.column_name
+    kw_args = node.test_info.kw_args
+
+    if qualified_test_name in _DBT_TEST_NAME_TO_ASSERTION_MAP:
+        assertion_params = _DBT_TEST_NAME_TO_ASSERTION_MAP[qualified_test_name]
+        assertion_info = AssertionInfoClass(
+            type=AssertionTypeClass.DATASET,
+            customProperties=extra_custom_props,
+            datasetAssertion=DatasetAssertionInfoClass(
+                dataset=upstream_urn,
+                scope=assertion_params.scope,
+                operator=assertion_params.operator,
+                fields=[mce_builder.make_schema_field_urn(upstream_urn, column_name)]
+                if (
+                    assertion_params.scope == DatasetAssertionScopeClass.DATASET_COLUMN
+                    and column_name
+                )
+                else [],
+                nativeType=node.name,
+                aggregation=assertion_params.aggregation,
+                parameters=assertion_params.parameters(kw_args)
+                if assertion_params.parameters
+                else None,
+                logic=assertion_params.logic_fn(kw_args)
+                if assertion_params.logic_fn
+                else None,
+                nativeParameters=_string_map(kw_args),
+            ),
+        )
+    elif column_name:
+        # no match with known test types, column-level test
+        assertion_info = AssertionInfoClass(
+            type=AssertionTypeClass.DATASET,
+            customProperties=extra_custom_props,
+            datasetAssertion=DatasetAssertionInfoClass(
+                dataset=upstream_urn,
+                scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+                operator=AssertionStdOperatorClass._NATIVE_,
+                fields=[mce_builder.make_schema_field_urn(upstream_urn, column_name)],
+                nativeType=node.name,
+                logic=node.compiled_code or node.raw_code,
+                aggregation=AssertionStdAggregationClass._NATIVE_,
+                nativeParameters=_string_map(kw_args),
+            ),
+        )
+    else:
+        # no match with known test types, default to row-level test
+        assertion_info = AssertionInfoClass(
+            type=AssertionTypeClass.DATASET,
+            customProperties=extra_custom_props,
+            datasetAssertion=DatasetAssertionInfoClass(
+                dataset=upstream_urn,
+                scope=DatasetAssertionScopeClass.DATASET_ROWS,
+                operator=AssertionStdOperatorClass._NATIVE_,
+                logic=node.compiled_code or node.raw_code,
+                nativeType=node.name,
+                aggregation=AssertionStdAggregationClass._NATIVE_,
+                nativeParameters=_string_map(kw_args),
+            ),
+        )
+
+    return MetadataChangeProposalWrapper(
+        entityUrn=assertion_urn,
+        aspect=assertion_info,
+    ).as_workunit()
+
+
+def make_assertion_result_from_test(
+    node: "DBTNode",
+    assertion_urn: str,
+    upstream_urn: str,
+    test_warnings_are_errors: bool,
+) -> MetadataWorkUnit:
+    assert node.test_result
+    test_result = node.test_result
+
+    assertionResult = AssertionRunEventClass(
+        timestampMillis=int(test_result.execution_time.timestamp() * 1000.0),
+        assertionUrn=assertion_urn,
+        asserteeUrn=upstream_urn,
+        runId=test_result.invocation_id,
+        result=AssertionResultClass(
+            type=AssertionResultTypeClass.SUCCESS
+            if test_result.status == "pass"
+            or (not test_warnings_are_errors and test_result.status == "warn")
+            else AssertionResultTypeClass.FAILURE,
+            nativeResults=test_result.native_results,
+        ),
+        status=AssertionRunStatusClass.COMPLETE,
+    )
+
+    return MetadataChangeProposalWrapper(
+        entityUrn=assertion_urn,
+        aspect=assertionResult,
+    ).as_workunit()

From 1b06c6a30c8d6c0ee57f75f75ee6a436aa6c13a7 Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Thu, 12 Oct 2023 00:31:42 +0530
Subject: [PATCH 116/156] fix(ingest/snowflake): fix sample fraction for very
 large tables (#8988)

---
 .../datahub/ingestion/source/snowflake/snowflake_profiler.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py
index 24275dcdff34d..8e18d85d6f3ca 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py
@@ -86,7 +86,7 @@ def get_batch_kwargs(
             # Fixed-size sampling can be slower than equivalent fraction-based sampling
             # as per https://docs.snowflake.com/en/sql-reference/constructs/sample#performance-considerations
             sample_pc = 100 * self.config.profiling.sample_size / table.rows_count
-            custom_sql = f'select * from "{db_name}"."{schema_name}"."{table.name}" TABLESAMPLE ({sample_pc:.3f})'
+            custom_sql = f'select * from "{db_name}"."{schema_name}"."{table.name}" TABLESAMPLE ({sample_pc:.8f})'
         return {
             **super().get_batch_kwargs(table, schema_name, db_name),
             # Lowercase/Mixedcase table names in Snowflake do not work by default.

From 245284ec6c6b754b22943ba42d7139ddd5772377 Mon Sep 17 00:00:00 2001
From: jayasimhankv <145704974+jayasimhankv@users.noreply.github.com>
Date: Wed, 11 Oct 2023 17:40:20 -0500
Subject: [PATCH 117/156] fix(): Display generic not found page for corp groups
 that do not exist (#8880)

Co-authored-by: Jay Kadambi <jayasimhan_venkatadri@optum.com>
---
 .../java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java  | 3 ++-
 datahub-graphql-core/src/main/resources/entity.graphql       | 5 +++++
 datahub-web-react/src/app/entity/group/GroupProfile.tsx      | 4 ++++
 datahub-web-react/src/graphql/group.graphql                  | 1 +
 4 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
index ebb5c7d62c7d3..b99f712034fe0 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
@@ -1292,7 +1292,8 @@ private void configureCorpUserResolvers(final RuntimeWiring.Builder builder) {
      */
     private void configureCorpGroupResolvers(final RuntimeWiring.Builder builder) {
         builder.type("CorpGroup", typeWiring -> typeWiring
-            .dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient)));
+            .dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient))
+            .dataFetcher("exists", new EntityExistsResolver(entityService)));
         builder.type("CorpGroupInfo", typeWiring -> typeWiring
                 .dataFetcher("admins",
                     new LoadableTypeBatchResolver<>(corpUserType,
diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql
index 0b15d7b875a9c..b37a8f34fa056 100644
--- a/datahub-graphql-core/src/main/resources/entity.graphql
+++ b/datahub-graphql-core/src/main/resources/entity.graphql
@@ -3788,6 +3788,11 @@ type CorpGroup implements Entity {
     Additional read only info about the group
     """
     info: CorpGroupInfo @deprecated
+
+    """
+    Whether or not this entity exists on DataHub
+    """
+    exists: Boolean
 }
 
 """
diff --git a/datahub-web-react/src/app/entity/group/GroupProfile.tsx b/datahub-web-react/src/app/entity/group/GroupProfile.tsx
index d5e284af931df..53d2062277dec 100644
--- a/datahub-web-react/src/app/entity/group/GroupProfile.tsx
+++ b/datahub-web-react/src/app/entity/group/GroupProfile.tsx
@@ -11,6 +11,7 @@ import { RoutedTabs } from '../../shared/RoutedTabs';
 import GroupInfoSidebar from './GroupInfoSideBar';
 import { GroupAssets } from './GroupAssets';
 import { ErrorSection } from '../../shared/error/ErrorSection';
+import NonExistentEntityPage from '../shared/entity/NonExistentEntityPage';
 
 const messageStyle = { marginTop: '10%' };
 
@@ -110,6 +111,9 @@ export default function GroupProfile() {
         urn,
     };
 
+    if (data?.corpGroup?.exists === false) {
+        return <NonExistentEntityPage />;
+    }
     return (
         <>
             {error && <ErrorSection />}
diff --git a/datahub-web-react/src/graphql/group.graphql b/datahub-web-react/src/graphql/group.graphql
index 9aa6e2b005f16..1007721e51a4e 100644
--- a/datahub-web-react/src/graphql/group.graphql
+++ b/datahub-web-react/src/graphql/group.graphql
@@ -3,6 +3,7 @@ query getGroup($urn: String!, $membersCount: Int!) {
         urn
         type
         name
+        exists
         origin {
             type
             externalType

From 245c5c00087116d236acf7a9bbddbdb4dee15949 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= <sgomezvillamor@gmail.com>
Date: Thu, 12 Oct 2023 02:06:19 +0200
Subject: [PATCH 118/156] fix(ingest/looker): stop emitting tag owner (#8942)

---
 docs/how/updating-datahub.md                  |  2 +
 .../ingestion/source/looker/looker_common.py  | 13 +-----
 .../looker/golden_looker_mces.json            | 42 -------------------
 .../looker/golden_test_allow_ingest.json      | 42 -------------------
 ...olden_test_external_project_view_mces.json | 42 -------------------
 .../looker/golden_test_file_path_ingest.json  | 42 -------------------
 .../golden_test_independent_look_ingest.json  | 42 -------------------
 .../looker/golden_test_ingest.json            | 42 -------------------
 .../looker/golden_test_ingest_joins.json      | 42 -------------------
 .../golden_test_ingest_unaliased_joins.json   | 42 -------------------
 .../looker_mces_golden_deleted_stateful.json  | 42 -------------------
 .../looker/looker_mces_usage_history.json     | 42 -------------------
 .../lookml/lookml_mces_api_bigquery.json      | 42 -------------------
 .../lookml/lookml_mces_api_hive2.json         | 42 -------------------
 .../lookml/lookml_mces_badsql_parser.json     | 42 -------------------
 .../lookml/lookml_mces_offline.json           | 42 -------------------
 .../lookml_mces_offline_deny_pattern.json     | 42 -------------------
 ...lookml_mces_offline_platform_instance.json | 42 -------------------
 .../lookml_mces_with_external_urls.json       | 42 -------------------
 .../lookml/lookml_reachable_views.json        | 42 -------------------
 20 files changed, 3 insertions(+), 768 deletions(-)

diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md
index 5d0ad5eaf8f7e..9cd4ad5c6f02d 100644
--- a/docs/how/updating-datahub.md
+++ b/docs/how/updating-datahub.md
@@ -7,6 +7,8 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
 ### Breaking Changes
 
 - #8810 - Removed support for SQLAlchemy 1.3.x. Only SQLAlchemy 1.4.x is supported now.
+- #8942 - Removed `urn:li:corpuser:datahub` owner for the `Measure`, `Dimension` and `Temporal` tags emitted 
+  by Looker and LookML source connectors.
 - #8853 - The Airflow plugin no longer supports Airflow 2.0.x or Python 3.7. See the docs for more details.
 - #8853 - Introduced the Airflow plugin v2. If you're using Airflow 2.3+, the v2 plugin will be enabled by default, and so you'll need to switch your requirements to include `pip install 'acryl-datahub-airflow-plugin[plugin-v2]'`. To continue using the v1 plugin, set the `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN` environment variable to `true`.
 - #8943 The Unity Catalog ingestion source has a new option `include_metastore`, which will cause all urns to be changed when disabled.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
index 89b1e45695c57..30c38720dd96c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
@@ -81,9 +81,6 @@
     EnumTypeClass,
     FineGrainedLineageClass,
     GlobalTagsClass,
-    OwnerClass,
-    OwnershipClass,
-    OwnershipTypeClass,
     SchemaMetadataClass,
     StatusClass,
     SubTypesClass,
@@ -453,17 +450,9 @@ def _get_schema(
     @staticmethod
     def _get_tag_mce_for_urn(tag_urn: str) -> MetadataChangeEvent:
         assert tag_urn in LookerUtil.tag_definitions
-        ownership = OwnershipClass(
-            owners=[
-                OwnerClass(
-                    owner="urn:li:corpuser:datahub",
-                    type=OwnershipTypeClass.DATAOWNER,
-                )
-            ]
-        )
         return MetadataChangeEvent(
             proposedSnapshot=TagSnapshotClass(
-                urn=tag_urn, aspects=[ownership, LookerUtil.tag_definitions[tag_urn]]
+                urn=tag_urn, aspects=[LookerUtil.tag_definitions[tag_urn]]
             )
         )
 
diff --git a/metadata-ingestion/tests/integration/looker/golden_looker_mces.json b/metadata-ingestion/tests/integration/looker/golden_looker_mces.json
index dee85b40bb7a8..1da42b94e320c 100644
--- a/metadata-ingestion/tests/integration/looker/golden_looker_mces.json
+++ b/metadata-ingestion/tests/integration/looker/golden_looker_mces.json
@@ -533,20 +533,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -566,20 +552,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -599,20 +571,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json
index 72db36e63daf7..685a606a57c33 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json
@@ -327,20 +327,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -360,20 +346,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -393,20 +365,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json b/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json
index e5508bdb06b9e..069788cb088ac 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json
@@ -327,20 +327,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -360,20 +346,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -393,20 +365,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json
index b0f66e7b245c9..f1c932ebd5a70 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json
@@ -335,20 +335,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -369,20 +355,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -403,20 +375,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json
index 91e13debfa028..9521c9af4bbdc 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json
@@ -550,20 +550,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -583,20 +569,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -616,20 +588,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json
index e93079119e4f4..dbacd52fe83de 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json
@@ -327,20 +327,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -360,20 +346,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -393,20 +365,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json
index a9c8efa7cdb98..aaa874d9ff348 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json
@@ -351,20 +351,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -384,20 +370,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -417,20 +389,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json
index edd15624a14cd..be8db0722aea3 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json
@@ -343,20 +343,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -376,20 +362,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -409,20 +381,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json b/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json
index aebc89b609a08..05b74f163ad45 100644
--- a/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json
+++ b/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json
@@ -327,20 +327,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -360,20 +346,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -393,20 +365,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json b/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json
index 34bded3cf691e..0778aa0050b00 100644
--- a/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json
+++ b/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json
@@ -279,20 +279,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -312,20 +298,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -345,20 +317,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json
index 238f4c2580cdf..5a0bd4e12fd3a 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json
@@ -2121,20 +2121,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -2154,20 +2140,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -2187,20 +2159,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json
index 45d5d839e9d21..1b0ee3216383c 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json
@@ -2121,20 +2121,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -2154,20 +2140,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -2187,20 +2159,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json
index 187cedaefb6b2..b960ba581e6b5 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json
@@ -2004,20 +2004,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -2037,20 +2023,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -2070,20 +2042,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json
index c2c879e38f37b..e29292a44c949 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json
@@ -2121,20 +2121,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -2154,20 +2140,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -2187,20 +2159,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_deny_pattern.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_deny_pattern.json
index c1ac54b0fb588..04ecaecbd4afb 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_deny_pattern.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_deny_pattern.json
@@ -584,20 +584,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -617,20 +603,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -650,20 +622,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json
index f602ca37b3160..080931ae637bc 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json
@@ -2121,20 +2121,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -2154,20 +2140,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -2187,20 +2159,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json
index 104bd365669e3..5826c4316b539 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json
@@ -2134,20 +2134,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -2167,20 +2153,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -2200,20 +2172,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_reachable_views.json b/metadata-ingestion/tests/integration/lookml/lookml_reachable_views.json
index 37a6c94c6952e..53d1ec0229de1 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_reachable_views.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_reachable_views.json
@@ -681,20 +681,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Dimension",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Dimension",
@@ -714,20 +700,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Temporal",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Temporal",
@@ -747,20 +719,6 @@
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
             "urn": "urn:li:tag:Measure",
             "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.Ownership": {
-                        "owners": [
-                            {
-                                "owner": "urn:li:corpuser:datahub",
-                                "type": "DATAOWNER"
-                            }
-                        ],
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        }
-                    }
-                },
                 {
                     "com.linkedin.pegasus2avro.tag.TagProperties": {
                         "name": "Measure",

From 84bba4dc446ee97f8991689fd17bfa6d14232601 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Thu, 12 Oct 2023 01:31:17 -0400
Subject: [PATCH 119/156] feat(ingest): add output schema inference for sql
 parser (#8989)

---
 .../src/datahub/utilities/sqlglot_lineage.py  | 119 ++++++++++++++++--
 .../integration/powerbi/test_m_parser.py      |  93 ++++----------
 .../test_bigquery_create_view_with_cte.json   |  32 ++++-
 ..._bigquery_from_sharded_table_wildcard.json |  16 ++-
 .../test_bigquery_nested_subqueries.json      |  16 ++-
 ..._bigquery_sharded_table_normalization.json |  16 ++-
 .../test_bigquery_star_with_replace.json      |  24 +++-
 .../test_bigquery_view_from_union.json        |  16 ++-
 .../goldens/test_create_view_as_select.json   |  16 ++-
 .../test_expand_select_star_basic.json        |  80 ++++++++++--
 .../goldens/test_insert_as_select.json        |  36 +++++-
 ...est_select_ambiguous_column_no_schema.json |  12 +-
 .../goldens/test_select_count.json            |   8 +-
 .../test_select_from_struct_subfields.json    |  16 ++-
 .../goldens/test_select_from_union.json       |  16 ++-
 .../sql_parsing/goldens/test_select_max.json  |   4 +-
 .../goldens/test_select_with_ctes.json        |   8 +-
 .../test_select_with_full_col_name.json       |  12 +-
 .../test_snowflake_case_statement.json        |  16 ++-
 .../goldens/test_snowflake_column_cast.json   |  63 ++++++++++
 .../test_snowflake_column_normalization.json  |  32 ++++-
 ...t_snowflake_ctas_column_normalization.json |  32 ++++-
 .../test_snowflake_default_normalization.json |  48 ++++++-
 .../unit/sql_parsing/test_sqlglot_lineage.py  |  21 ++++
 24 files changed, 604 insertions(+), 148 deletions(-)
 create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_cast.json

diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
index 81c43884fdf7d..349eb40a5e865 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
@@ -5,12 +5,13 @@
 import logging
 import pathlib
 from collections import defaultdict
-from typing import Dict, List, Optional, Set, Tuple, Union
+from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
 import pydantic.dataclasses
 import sqlglot
 import sqlglot.errors
 import sqlglot.lineage
+import sqlglot.optimizer.annotate_types
 import sqlglot.optimizer.qualify
 import sqlglot.optimizer.qualify_columns
 from pydantic import BaseModel
@@ -23,7 +24,17 @@
 from datahub.ingestion.api.closeable import Closeable
 from datahub.ingestion.graph.client import DataHubGraph
 from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier
-from datahub.metadata.schema_classes import OperationTypeClass, SchemaMetadataClass
+from datahub.metadata.schema_classes import (
+    ArrayTypeClass,
+    BooleanTypeClass,
+    DateTypeClass,
+    NumberTypeClass,
+    OperationTypeClass,
+    SchemaFieldDataTypeClass,
+    SchemaMetadataClass,
+    StringTypeClass,
+    TimeTypeClass,
+)
 from datahub.utilities.file_backed_collections import ConnectionWrapper, FileBackedDict
 from datahub.utilities.urns.dataset_urn import DatasetUrn
 
@@ -90,8 +101,18 @@ def get_query_type_of_sql(expression: sqlglot.exp.Expression) -> QueryType:
     return QueryType.UNKNOWN
 
 
+class _ParserBaseModel(
+    BaseModel,
+    arbitrary_types_allowed=True,
+    json_encoders={
+        SchemaFieldDataTypeClass: lambda v: v.to_obj(),
+    },
+):
+    pass
+
+
 @functools.total_ordering
-class _FrozenModel(BaseModel, frozen=True):
+class _FrozenModel(_ParserBaseModel, frozen=True):
     def __lt__(self, other: "_FrozenModel") -> bool:
         for field in self.__fields__:
             self_v = getattr(self, field)
@@ -146,29 +167,42 @@ class _ColumnRef(_FrozenModel):
     column: str
 
 
-class ColumnRef(BaseModel):
+class ColumnRef(_ParserBaseModel):
     table: Urn
     column: str
 
 
-class _DownstreamColumnRef(BaseModel):
+class _DownstreamColumnRef(_ParserBaseModel):
     table: Optional[_TableName]
     column: str
+    column_type: Optional[sqlglot.exp.DataType]
 
 
-class DownstreamColumnRef(BaseModel):
+class DownstreamColumnRef(_ParserBaseModel):
     table: Optional[Urn]
     column: str
+    column_type: Optional[SchemaFieldDataTypeClass]
+    native_column_type: Optional[str]
+
+    @pydantic.validator("column_type", pre=True)
+    def _load_column_type(
+        cls, v: Optional[Union[dict, SchemaFieldDataTypeClass]]
+    ) -> Optional[SchemaFieldDataTypeClass]:
+        if v is None:
+            return None
+        if isinstance(v, SchemaFieldDataTypeClass):
+            return v
+        return SchemaFieldDataTypeClass.from_obj(v)
 
 
-class _ColumnLineageInfo(BaseModel):
+class _ColumnLineageInfo(_ParserBaseModel):
     downstream: _DownstreamColumnRef
     upstreams: List[_ColumnRef]
 
     logic: Optional[str]
 
 
-class ColumnLineageInfo(BaseModel):
+class ColumnLineageInfo(_ParserBaseModel):
     downstream: DownstreamColumnRef
     upstreams: List[ColumnRef]
 
@@ -176,7 +210,7 @@ class ColumnLineageInfo(BaseModel):
     logic: Optional[str] = pydantic.Field(default=None, exclude=True)
 
 
-class SqlParsingDebugInfo(BaseModel, arbitrary_types_allowed=True):
+class SqlParsingDebugInfo(_ParserBaseModel):
     confidence: float = 0.0
 
     tables_discovered: int = 0
@@ -190,7 +224,7 @@ def error(self) -> Optional[Exception]:
         return self.table_error or self.column_error
 
 
-class SqlParsingResult(BaseModel):
+class SqlParsingResult(_ParserBaseModel):
     query_type: QueryType = QueryType.UNKNOWN
 
     in_tables: List[Urn]
@@ -541,6 +575,15 @@ def _schema_aware_fuzzy_column_resolve(
         ) from e
     logger.debug("Qualified sql %s", statement.sql(pretty=True, dialect=dialect))
 
+    # Try to figure out the types of the output columns.
+    try:
+        statement = sqlglot.optimizer.annotate_types.annotate_types(
+            statement, schema=sqlglot_db_schema
+        )
+    except sqlglot.errors.OptimizeError as e:
+        # This is not a fatal error, so we can continue.
+        logger.debug("sqlglot failed to annotate types: %s", e)
+
     column_lineage = []
 
     try:
@@ -553,7 +596,6 @@ def _schema_aware_fuzzy_column_resolve(
         logger.debug("output columns: %s", [col[0] for col in output_columns])
         output_col: str
         for output_col, original_col_expression in output_columns:
-            # print(f"output column: {output_col}")
             if output_col == "*":
                 # If schema information is available, the * will be expanded to the actual columns.
                 # Otherwise, we can't process it.
@@ -613,12 +655,19 @@ def _schema_aware_fuzzy_column_resolve(
 
             output_col = _schema_aware_fuzzy_column_resolve(output_table, output_col)
 
+            # Guess the output column type.
+            output_col_type = None
+            if original_col_expression.type:
+                output_col_type = original_col_expression.type
+
             if not direct_col_upstreams:
                 logger.debug(f'  "{output_col}" has no upstreams')
             column_lineage.append(
                 _ColumnLineageInfo(
                     downstream=_DownstreamColumnRef(
-                        table=output_table, column=output_col
+                        table=output_table,
+                        column=output_col,
+                        column_type=output_col_type,
                     ),
                     upstreams=sorted(direct_col_upstreams),
                     # logic=column_logic.sql(pretty=True, dialect=dialect),
@@ -673,6 +722,42 @@ def _try_extract_select(
     return statement
 
 
+def _translate_sqlglot_type(
+    sqlglot_type: sqlglot.exp.DataType.Type,
+) -> Optional[SchemaFieldDataTypeClass]:
+    TypeClass: Any
+    if sqlglot_type in sqlglot.exp.DataType.TEXT_TYPES:
+        TypeClass = StringTypeClass
+    elif sqlglot_type in sqlglot.exp.DataType.NUMERIC_TYPES or sqlglot_type in {
+        sqlglot.exp.DataType.Type.DECIMAL,
+    }:
+        TypeClass = NumberTypeClass
+    elif sqlglot_type in {
+        sqlglot.exp.DataType.Type.BOOLEAN,
+        sqlglot.exp.DataType.Type.BIT,
+    }:
+        TypeClass = BooleanTypeClass
+    elif sqlglot_type in {
+        sqlglot.exp.DataType.Type.DATE,
+    }:
+        TypeClass = DateTypeClass
+    elif sqlglot_type in sqlglot.exp.DataType.TEMPORAL_TYPES:
+        TypeClass = TimeTypeClass
+    elif sqlglot_type in {
+        sqlglot.exp.DataType.Type.ARRAY,
+    }:
+        TypeClass = ArrayTypeClass
+    elif sqlglot_type in {
+        sqlglot.exp.DataType.Type.UNKNOWN,
+    }:
+        return None
+    else:
+        logger.debug("Unknown sqlglot type: %s", sqlglot_type)
+        return None
+
+    return SchemaFieldDataTypeClass(type=TypeClass())
+
+
 def _translate_internal_column_lineage(
     table_name_urn_mapping: Dict[_TableName, str],
     raw_column_lineage: _ColumnLineageInfo,
@@ -684,6 +769,16 @@ def _translate_internal_column_lineage(
         downstream=DownstreamColumnRef(
             table=downstream_urn,
             column=raw_column_lineage.downstream.column,
+            column_type=_translate_sqlglot_type(
+                raw_column_lineage.downstream.column_type.this
+            )
+            if raw_column_lineage.downstream.column_type
+            else None,
+            native_column_type=raw_column_lineage.downstream.column_type.sql()
+            if raw_column_lineage.downstream.column_type
+            and raw_column_lineage.downstream.column_type.this
+            != sqlglot.exp.DataType.Type.UNKNOWN
+            else None,
         ),
         upstreams=[
             ColumnRef(
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index e3cc6c8101650..b6cb578217a2c 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -17,7 +17,6 @@
 )
 from datahub.ingestion.source.powerbi.m_query import parser, resolver, tree_function
 from datahub.ingestion.source.powerbi.m_query.resolver import DataPlatformTable, Lineage
-from datahub.utilities.sqlglot_lineage import ColumnLineageInfo, DownstreamColumnRef
 
 pytestmark = pytest.mark.integration_batch_2
 
@@ -742,75 +741,25 @@ def test_sqlglot_parser():
         == "urn:li:dataset:(urn:li:dataPlatform:snowflake,sales_deployment.operations_analytics.transformed_prod.v_sme_unit_targets,PROD)"
     )
 
-    assert lineage[0].column_lineage == [
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="client_director"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="tier"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column='upper("manager")'),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="team_type"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="date_target"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="monthid"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="target_team"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="seller_email"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="agent_key"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="sme_quota"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="revenue_quota"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="service_quota"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="bl_target"),
-            upstreams=[],
-            logic=None,
-        ),
-        ColumnLineageInfo(
-            downstream=DownstreamColumnRef(table=None, column="software_quota"),
-            upstreams=[],
-            logic=None,
-        ),
+    # TODO: None of these columns have upstreams?
+    # That doesn't seem right - we probably need to add fake schemas for the two tables above.
+    cols = [
+        "client_director",
+        "tier",
+        'upper("manager")',
+        "team_type",
+        "date_target",
+        "monthid",
+        "target_team",
+        "seller_email",
+        "agent_key",
+        "sme_quota",
+        "revenue_quota",
+        "service_quota",
+        "bl_target",
+        "software_quota",
     ]
+    for i, column in enumerate(cols):
+        assert lineage[0].column_lineage[i].downstream.table is None
+        assert lineage[0].column_lineage[i].downstream.column == column
+        assert lineage[0].column_lineage[i].upstreams == []
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json
index e50d944ce72e3..f0175b4dc8892 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json
@@ -12,7 +12,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj-2.dataset.my_view,PROD)",
-                "column": "col5"
+                "column": "col5",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -24,7 +30,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj-2.dataset.my_view,PROD)",
-                "column": "col1"
+                "column": "col1",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -36,7 +48,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj-2.dataset.my_view,PROD)",
-                "column": "col2"
+                "column": "col2",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -48,7 +66,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj-2.dataset.my_view,PROD)",
-                "column": "col3"
+                "column": "col3",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json
index 78591286feb50..b7df5444987f2 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json
@@ -8,7 +8,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "col1"
+                "column": "col1",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -20,7 +26,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "col2"
+                "column": "col2",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json
index 0e93d31fbb6a6..67e306bebf545 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json
@@ -8,7 +8,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "col1"
+                "column": "col1",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -20,7 +26,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "col2"
+                "column": "col2",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json
index 78591286feb50..b7df5444987f2 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json
@@ -8,7 +8,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "col1"
+                "column": "col1",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -20,7 +26,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "col2"
+                "column": "col2",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json
index 17a801a63e3ff..b393b2445d6c4 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json
@@ -10,7 +10,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-project.my-dataset.test_table,PROD)",
-                "column": "col1"
+                "column": "col1",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -22,7 +28,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-project.my-dataset.test_table,PROD)",
-                "column": "col2"
+                "column": "col2",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -34,7 +46,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-project.my-dataset.test_table,PROD)",
-                "column": "something"
+                "column": "something",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json
index fd8a586ac74ac..53fb94300e804 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json
@@ -11,7 +11,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my_view,PROD)",
-                "column": "col1"
+                "column": "col1",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -27,7 +33,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my_view,PROD)",
-                "column": "col2"
+                "column": "col2",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json
index 1ca56840531e4..ff452467aa5bd 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json
@@ -10,7 +10,9 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:oracle,vsal,PROD)",
-                "column": "Department"
+                "column": "Department",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": [
                 {
@@ -22,14 +24,22 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:oracle,vsal,PROD)",
-                "column": "Employees"
+                "column": "Employees",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "BIGINT"
             },
             "upstreams": []
         },
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:oracle,vsal,PROD)",
-                "column": "Salary"
+                "column": "Salary",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_expand_select_star_basic.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_expand_select_star_basic.json
index e241bdd08e243..eecb2265eaec5 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_expand_select_star_basic.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_expand_select_star_basic.json
@@ -8,7 +8,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "total_agg"
+                "column": "total_agg",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DOUBLE"
             },
             "upstreams": [
                 {
@@ -20,7 +26,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "orderkey"
+                "column": "orderkey",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DECIMAL"
             },
             "upstreams": [
                 {
@@ -32,7 +44,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "custkey"
+                "column": "custkey",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DECIMAL"
             },
             "upstreams": [
                 {
@@ -44,7 +62,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "orderstatus"
+                "column": "orderstatus",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -56,7 +80,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "totalprice"
+                "column": "totalprice",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "FLOAT"
             },
             "upstreams": [
                 {
@@ -68,7 +98,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "orderdate"
+                "column": "orderdate",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                    }
+                },
+                "native_column_type": "DATE"
             },
             "upstreams": [
                 {
@@ -80,7 +116,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "orderpriority"
+                "column": "orderpriority",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -92,7 +134,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "clerk"
+                "column": "clerk",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
@@ -104,7 +152,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "shippriority"
+                "column": "shippriority",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DECIMAL"
             },
             "upstreams": [
                 {
@@ -116,7 +170,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "comment"
+                "column": "comment",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "TEXT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_as_select.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_as_select.json
index d7264fd2db6b2..326db47e7ab33 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_as_select.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_as_select.json
@@ -18,21 +18,27 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)",
-                "column": "i_item_desc"
+                "column": "i_item_desc",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": []
         },
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)",
-                "column": "w_warehouse_name"
+                "column": "w_warehouse_name",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": []
         },
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)",
-                "column": "d_week_seq"
+                "column": "d_week_seq",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": [
                 {
@@ -44,7 +50,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)",
-                "column": "no_promo"
+                "column": "no_promo",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "BIGINT"
             },
             "upstreams": [
                 {
@@ -56,7 +68,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)",
-                "column": "promo"
+                "column": "promo",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "BIGINT"
             },
             "upstreams": [
                 {
@@ -68,7 +86,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)",
-                "column": "total_cnt"
+                "column": "total_cnt",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "BIGINT"
             },
             "upstreams": []
         }
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json
index 10f5ee20b0c1f..b5fd5eebeb1b1 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json
@@ -9,21 +9,27 @@
         {
             "downstream": {
                 "table": null,
-                "column": "a"
+                "column": "a",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": []
         },
         {
             "downstream": {
                 "table": null,
-                "column": "b"
+                "column": "b",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": []
         },
         {
             "downstream": {
                 "table": null,
-                "column": "c"
+                "column": "c",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": []
         }
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_count.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_count.json
index 9f6eeae46c294..a67c944822138 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_count.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_count.json
@@ -8,7 +8,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "COUNT(`fact_complaint_snapshot`.`etl_data_dt_id`)"
+                "column": "COUNT(`fact_complaint_snapshot`.`etl_data_dt_id`)",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "BIGINT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json
index 109de96180422..5ad847e252497 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json
@@ -8,7 +8,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "post_id"
+                "column": "post_id",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DECIMAL"
             },
             "upstreams": [
                 {
@@ -20,7 +26,9 @@
         {
             "downstream": {
                 "table": null,
-                "column": "id"
+                "column": "id",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": [
                 {
@@ -32,7 +40,9 @@
         {
             "downstream": {
                 "table": null,
-                "column": "min_metric"
+                "column": "min_metric",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json
index 2340b2e95b0d0..902aa010c8afc 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json
@@ -9,14 +9,26 @@
         {
             "downstream": {
                 "table": null,
-                "column": "label"
+                "column": "label",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "VARCHAR"
             },
             "upstreams": []
         },
         {
             "downstream": {
                 "table": null,
-                "column": "total_agg"
+                "column": "total_agg",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DOUBLE"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max.json
index 326c07d332c26..6ea88f45847ce 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max.json
@@ -8,7 +8,9 @@
         {
             "downstream": {
                 "table": null,
-                "column": "max_col"
+                "column": "max_col",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json
index 3e02314d6e8c3..67e9fd2d21a0e 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json
@@ -9,7 +9,9 @@
         {
             "downstream": {
                 "table": null,
-                "column": "COL1"
+                "column": "COL1",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": [
                 {
@@ -21,7 +23,9 @@
         {
             "downstream": {
                 "table": null,
-                "column": "COL3"
+                "column": "COL3",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json
index c12ad23b2f03b..6ee3d2e61c39b 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json
@@ -8,7 +8,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "post_id"
+                "column": "post_id",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DECIMAL"
             },
             "upstreams": [
                 {
@@ -20,7 +26,9 @@
         {
             "downstream": {
                 "table": null,
-                "column": "id"
+                "column": "id",
+                "column_type": null,
+                "native_column_type": null
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_case_statement.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_case_statement.json
index 64cd80e9a2d69..a876824127ec1 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_case_statement.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_case_statement.json
@@ -8,7 +8,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "total_price_category"
+                "column": "total_price_category",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "VARCHAR"
             },
             "upstreams": [
                 {
@@ -20,7 +26,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "total_price_success"
+                "column": "total_price_success",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "FLOAT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_cast.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_cast.json
new file mode 100644
index 0000000000000..7545e2b3269dc
--- /dev/null
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_cast.json
@@ -0,0 +1,63 @@
+{
+    "query_type": "SELECT",
+    "in_tables": [
+        "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)"
+    ],
+    "out_tables": [],
+    "column_lineage": [
+        {
+            "downstream": {
+                "table": null,
+                "column": "orderkey",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DECIMAL(20, 0)"
+            },
+            "upstreams": [
+                {
+                    "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)",
+                    "column": "o_orderkey"
+                }
+            ]
+        },
+        {
+            "downstream": {
+                "table": null,
+                "column": "total_cast_int",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "INT"
+            },
+            "upstreams": [
+                {
+                    "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)",
+                    "column": "o_totalprice"
+                }
+            ]
+        },
+        {
+            "downstream": {
+                "table": null,
+                "column": "total_cast_float",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DECIMAL(16, 4)"
+            },
+            "upstreams": [
+                {
+                    "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)",
+                    "column": "o_totalprice"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_normalization.json
index 7b22a46757e39..84e6b053000f1 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_normalization.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_normalization.json
@@ -8,7 +8,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "total_agg"
+                "column": "total_agg",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DOUBLE"
             },
             "upstreams": [
                 {
@@ -20,7 +26,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "total_avg"
+                "column": "total_avg",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DOUBLE"
             },
             "upstreams": [
                 {
@@ -32,7 +44,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "total_min"
+                "column": "total_min",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "FLOAT"
             },
             "upstreams": [
                 {
@@ -44,7 +62,13 @@
         {
             "downstream": {
                 "table": null,
-                "column": "total_max"
+                "column": "total_max",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "FLOAT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_ctas_column_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_ctas_column_normalization.json
index c912d99a3a8a3..39c94cf83c561 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_ctas_column_normalization.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_ctas_column_normalization.json
@@ -10,7 +10,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders_normalized,PROD)",
-                "column": "Total_Agg"
+                "column": "Total_Agg",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DOUBLE"
             },
             "upstreams": [
                 {
@@ -22,7 +28,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders_normalized,PROD)",
-                "column": "total_avg"
+                "column": "total_avg",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DOUBLE"
             },
             "upstreams": [
                 {
@@ -34,7 +46,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders_normalized,PROD)",
-                "column": "TOTAL_MIN"
+                "column": "TOTAL_MIN",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "FLOAT"
             },
             "upstreams": [
                 {
@@ -46,7 +64,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders_normalized,PROD)",
-                "column": "total_max"
+                "column": "total_max",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "FLOAT"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json
index 2af308ec60623..dbf5b1b9a4453 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json
@@ -11,7 +11,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)",
-                "column": "user_fk"
+                "column": "user_fk",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DECIMAL(38, 0)"
             },
             "upstreams": [
                 {
@@ -23,7 +29,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)",
-                "column": "email"
+                "column": "email",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                    }
+                },
+                "native_column_type": "VARCHAR(16777216)"
             },
             "upstreams": [
                 {
@@ -35,7 +47,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)",
-                "column": "last_purchase_date"
+                "column": "last_purchase_date",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                    }
+                },
+                "native_column_type": "DATE"
             },
             "upstreams": [
                 {
@@ -47,7 +65,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)",
-                "column": "lifetime_purchase_amount"
+                "column": "lifetime_purchase_amount",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DECIMAL"
             },
             "upstreams": [
                 {
@@ -59,7 +83,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)",
-                "column": "lifetime_purchase_count"
+                "column": "lifetime_purchase_count",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "BIGINT"
             },
             "upstreams": [
                 {
@@ -71,7 +101,13 @@
         {
             "downstream": {
                 "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)",
-                "column": "average_purchase_amount"
+                "column": "average_purchase_amount",
+                "column_type": {
+                    "type": {
+                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                    }
+                },
+                "native_column_type": "DECIMAL"
             },
             "upstreams": [
                 {
diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
index 2a965a9bb1e61..bb6e5f1581754 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
+++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
@@ -608,4 +608,25 @@ def test_snowflake_default_normalization():
     )
 
 
+def test_snowflake_column_cast():
+    assert_sql_result(
+        """
+SELECT
+    o.o_orderkey::NUMBER(20,0) as orderkey,
+    CAST(o.o_totalprice AS INT) as total_cast_int,
+    CAST(o.o_totalprice AS NUMBER(16,4)) as total_cast_float
+FROM snowflake_sample_data.tpch_sf1.orders o
+LIMIT 10
+""",
+        dialect="snowflake",
+        schemas={
+            "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)": {
+                "orderkey": "NUMBER(38,0)",
+                "totalprice": "NUMBER(12,2)",
+            },
+        },
+        expected_file=RESOURCE_DIR / "test_snowflake_column_cast.json",
+    )
+
+
 # TODO: Add a test for setting platform_instance or env

From dd418de76d96fb41c9064261cdba37bc2af85309 Mon Sep 17 00:00:00 2001
From: Tamas Nemeth <treff7es@gmail.com>
Date: Thu, 12 Oct 2023 13:10:59 +0200
Subject: [PATCH 120/156] fix(ingest/bigquery): Fix shard regexp to match
 without underscore as well (#8934)

---
 .../ingestion/source/bigquery_v2/bigquery.py  |  1 +
 .../source/bigquery_v2/bigquery_audit.py      | 27 ++++++++++++++-----
 .../ingestion/source/bigquery_v2/queries.py   |  8 +++---
 .../ingestion/source_config/bigquery.py       |  8 +++++-
 .../tests/unit/test_bigquery_source.py        | 10 ++++---
 .../unit/test_bigqueryv2_usage_source.py      |  4 +--
 6 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
index b4a04d96b532b..e577c2bac8bbd 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
@@ -1057,6 +1057,7 @@ def gen_schema_fields(self, columns: List[BigqueryColumn]) -> List[SchemaField]:
                         ):
                             field.description = col.comment
                             schema_fields[idx] = field
+                            break
             else:
                 tags = []
                 if col.is_partition_column:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py
index b0ac77201b415..88060a9cdc91d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py
@@ -20,7 +20,13 @@
 
 logger: logging.Logger = logging.getLogger(__name__)
 
-_BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX = "((.+)[_$])?(\\d{8})$"
+# Regexp for sharded tables.
+# A sharded table is a table that has a suffix of the form _yyyymmdd or yyyymmdd, where yyyymmdd is a date.
+# The regexp checks for valid dates in the suffix (e.g. 20200101, 20200229, 20201231) and if the date is not valid
+# then it is not a sharded table.
+_BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX = (
+    "((.+\\D)[_$]?)?(\\d\\d\\d\\d(?:0[1-9]|1[0-2])(?:0[1-9]|[12][0-9]|3[01]))$"
+)
 
 
 @dataclass(frozen=True, order=True)
@@ -40,7 +46,7 @@ class BigqueryTableIdentifier:
     _BQ_SHARDED_TABLE_SUFFIX: str = "_yyyymmdd"
 
     @staticmethod
-    def get_table_and_shard(table_name: str) -> Tuple[str, Optional[str]]:
+    def get_table_and_shard(table_name: str) -> Tuple[Optional[str], Optional[str]]:
         """
         Args:
             table_name:
@@ -53,16 +59,25 @@ def get_table_and_shard(table_name: str) -> Tuple[str, Optional[str]]:
                 In case of non-sharded tables, returns (<table-id>, None)
                 In case of sharded tables, returns (<table-prefix>, shard)
         """
+        new_table_name = table_name
         match = re.match(
             BigqueryTableIdentifier._BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX,
             table_name,
             re.IGNORECASE,
         )
         if match:
-            table_name = match.group(2)
-            shard = match.group(3)
-            return table_name, shard
-        return table_name, None
+            shard: str = match[3]
+            if shard:
+                if table_name.endswith(shard):
+                    new_table_name = table_name[: -len(shard)]
+
+            new_table_name = (
+                new_table_name.rstrip("_") if new_table_name else new_table_name
+            )
+            if new_table_name.endswith("."):
+                new_table_name = table_name
+            return (new_table_name, shard) if new_table_name else (None, shard)
+        return new_table_name, None
 
     @classmethod
     def from_string_name(cls, table: str) -> "BigqueryTableIdentifier":
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py
index a87cb8c1cbfa5..67fcc33cdf218 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py
@@ -51,8 +51,8 @@ class BigqueryQuery:
   p.max_partition_id,
   p.active_billable_bytes,
   p.long_term_billable_bytes,
-  REGEXP_EXTRACT(t.table_name, r".*_(\\d+)$") as table_suffix,
-  REGEXP_REPLACE(t.table_name, r"_(\\d+)$", "") as table_base
+  REGEXP_EXTRACT(t.table_name, r"(?:(?:.+\\D)[_$]?)(\\d\\d\\d\\d(?:0[1-9]|1[012])(?:0[1-9]|[12][0-9]|3[01]))$") as table_suffix,
+  REGEXP_REPLACE(t.table_name, r"(?:[_$]?)(\\d\\d\\d\\d(?:0[1-9]|1[012])(?:0[1-9]|[12][0-9]|3[01]))$", "") as table_base
 
 FROM
   `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t
@@ -92,8 +92,8 @@ class BigqueryQuery:
   tos.OPTION_VALUE as comment,
   t.is_insertable_into,
   t.ddl,
-  REGEXP_EXTRACT(t.table_name, r".*_(\\d+)$") as table_suffix,
-  REGEXP_REPLACE(t.table_name, r"_(\\d+)$", "") as table_base
+  REGEXP_EXTRACT(t.table_name, r"(?:(?:.+\\D)[_$]?)(\\d\\d\\d\\d(?:0[1-9]|1[012])(?:0[1-9]|[12][0-9]|3[01]))$") as table_suffix,
+  REGEXP_REPLACE(t.table_name, r"(?:[_$]?)(\\d\\d\\d\\d(?:0[1-9]|1[012])(?:0[1-9]|[12][0-9]|3[01]))$", "") as table_base
 
 FROM
   `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t
diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source_config/bigquery.py
index 8ca1296d819c1..0a73bb5203e72 100644
--- a/metadata-ingestion/src/datahub/ingestion/source_config/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source_config/bigquery.py
@@ -4,7 +4,13 @@
 
 from datahub.configuration.common import ConfigModel, ConfigurationError
 
-_BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX: str = "((.+)[_$])?(\\d{8})$"
+# Regexp for sharded tables.
+# A sharded table is a table that has a suffix of the form _yyyymmdd or yyyymmdd, where yyyymmdd is a date.
+# The regexp checks for valid dates in the suffix (e.g. 20200101, 20200229, 20201231) and if the date is not valid
+# then it is not a sharded table.
+_BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX: str = (
+    "((.+\\D)[_$]?)?(\\d\\d\\d\\d(?:0[1-9]|1[0-2])(?:0[1-9]|[12][0-9]|3[01]))$"
+)
 
 
 class BigQueryBaseConfig(ConfigModel):
diff --git a/metadata-ingestion/tests/unit/test_bigquery_source.py b/metadata-ingestion/tests/unit/test_bigquery_source.py
index e9e91361f49f4..5a11a933c8595 100644
--- a/metadata-ingestion/tests/unit/test_bigquery_source.py
+++ b/metadata-ingestion/tests/unit/test_bigquery_source.py
@@ -765,11 +765,14 @@ def test_gen_view_dataset_workunits(
         ("project.dataset.table_20231215", "project.dataset.table", "20231215"),
         ("project.dataset.table_2023", "project.dataset.table_2023", None),
         # incorrectly handled special case where dataset itself is a sharded table if full name is specified
-        ("project.dataset.20231215", "project.dataset.20231215", None),
+        ("project.dataset.20231215", "project.dataset.20231215", "20231215"),
+        ("project1.dataset2.20231215", "project1.dataset2.20231215", "20231215"),
         # Cases with Just the table name as input
         ("table", "table", None),
-        ("table20231215", "table20231215", None),
+        ("table20231215", "table", "20231215"),
         ("table_20231215", "table", "20231215"),
+        ("table2_20231215", "table2", "20231215"),
+        ("table220231215", "table220231215", None),
         ("table_1624046611000_name", "table_1624046611000_name", None),
         ("table_1624046611000", "table_1624046611000", None),
         # Special case where dataset itself is a sharded table
@@ -801,7 +804,6 @@ def test_get_table_and_shard_default(
         ("project.dataset.2023", "project.dataset.2023", None),
         # Cases with Just the table name as input
         ("table", "table", None),
-        ("table20231215", "table20231215", None),
         ("table_20231215", "table", "20231215"),
         ("table_2023", "table", "2023"),
         ("table_1624046611000_name", "table_1624046611000_name", None),
@@ -842,7 +844,7 @@ def test_get_table_and_shard_custom_shard_pattern(
             "project.dataset.table_1624046611000_name",
         ),
         ("project.dataset.table_1624046611000", "project.dataset.table_1624046611000"),
-        ("project.dataset.table20231215", "project.dataset.table20231215"),
+        ("project.dataset.table20231215", "project.dataset.table"),
         ("project.dataset.table_*", "project.dataset.table"),
         ("project.dataset.table_2023*", "project.dataset.table"),
         ("project.dataset.table_202301*", "project.dataset.table"),
diff --git a/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py b/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py
index 4cf42da4395f9..44fd840f28d59 100644
--- a/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py
+++ b/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py
@@ -144,10 +144,10 @@ def test_bigquery_table_sanitasitation():
     assert new_table_ref.dataset == "dataset-4567"
 
     table_ref = BigQueryTableRef(
-        BigqueryTableIdentifier("project-1234", "dataset-4567", "foo_20222110")
+        BigqueryTableIdentifier("project-1234", "dataset-4567", "foo_20221210")
     )
     new_table_identifier = table_ref.table_identifier
-    assert new_table_identifier.table == "foo_20222110"
+    assert new_table_identifier.table == "foo_20221210"
     assert new_table_identifier.is_sharded_table()
     assert new_table_identifier.get_table_display_name() == "foo"
     assert new_table_identifier.project_id == "project-1234"

From c381806110ae995dd2164305394ee4e1d131e033 Mon Sep 17 00:00:00 2001
From: Tamas Nemeth <treff7es@gmail.com>
Date: Thu, 12 Oct 2023 13:56:30 +0200
Subject: [PATCH 121/156] feat(ingestion): Adding config option to auto
 lowercase dataset urns (#8928)

---
 .../datahub/configuration/source_common.py    |  7 ++
 .../src/datahub/ingestion/api/source.py       | 24 +++++++
 .../datahub/ingestion/api/source_helpers.py   | 20 +++++-
 .../ingestion/source/bigquery_v2/bigquery.py  |  3 -
 .../source/bigquery_v2/bigquery_config.py     |  5 --
 .../src/datahub/ingestion/source/kafka.py     | 11 ++-
 .../ingestion/source/sql/sql_config.py        | 11 ++-
 .../datahub/ingestion/source/unity/config.py  |  6 +-
 .../src/datahub/utilities/urns/urn_iter.py    | 33 +++++++--
 .../api/source_helpers/test_source_helpers.py | 70 +++++++++++++++++++
 10 files changed, 170 insertions(+), 20 deletions(-)

diff --git a/metadata-ingestion/src/datahub/configuration/source_common.py b/metadata-ingestion/src/datahub/configuration/source_common.py
index a9f891ddb7b1e..80b6ceb576c1c 100644
--- a/metadata-ingestion/src/datahub/configuration/source_common.py
+++ b/metadata-ingestion/src/datahub/configuration/source_common.py
@@ -54,6 +54,13 @@ class DatasetSourceConfigMixin(PlatformInstanceConfigMixin, EnvConfigMixin):
     """
 
 
+class LowerCaseDatasetUrnConfigMixin(ConfigModel):
+    convert_urns_to_lowercase: bool = Field(
+        default=False,
+        description="Whether to convert dataset urns to lowercase.",
+    )
+
+
 class DatasetLineageProviderConfigBase(EnvConfigMixin):
     """
     Any non-Dataset source that produces lineage to Datasets should inherit this class.
diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py
index 0bcc220cad49b..b86844b1c4c83 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/source.py
@@ -29,6 +29,7 @@
 from datahub.ingestion.api.report import Report
 from datahub.ingestion.api.source_helpers import (
     auto_browse_path_v2,
+    auto_lowercase_urns,
     auto_materialize_referenced_tags,
     auto_status_aspect,
     auto_workunit_reporter,
@@ -192,7 +193,30 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
                 self.ctx.pipeline_config.flags.generate_browse_path_v2_dry_run
             )
 
+        auto_lowercase_dataset_urns: Optional[MetadataWorkUnitProcessor] = None
+        if (
+            self.ctx.pipeline_config
+            and self.ctx.pipeline_config.source
+            and self.ctx.pipeline_config.source.config
+            and (
+                (
+                    hasattr(
+                        self.ctx.pipeline_config.source.config,
+                        "convert_urns_to_lowercase",
+                    )
+                    and self.ctx.pipeline_config.source.config.convert_urns_to_lowercase
+                )
+                or (
+                    hasattr(self.ctx.pipeline_config.source.config, "get")
+                    and self.ctx.pipeline_config.source.config.get(
+                        "convert_urns_to_lowercase"
+                    )
+                )
+            )
+        ):
+            auto_lowercase_dataset_urns = auto_lowercase_urns
         return [
+            auto_lowercase_dataset_urns,
             auto_status_aspect,
             auto_materialize_referenced_tags,
             browse_path_processor,
diff --git a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
index 7fc15cf829678..2ce9e07bc57bc 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
@@ -35,7 +35,7 @@
 from datahub.utilities.urns.dataset_urn import DatasetUrn
 from datahub.utilities.urns.tag_urn import TagUrn
 from datahub.utilities.urns.urn import guess_entity_type
-from datahub.utilities.urns.urn_iter import list_urns
+from datahub.utilities.urns.urn_iter import list_urns, lowercase_dataset_urns
 
 if TYPE_CHECKING:
     from datahub.ingestion.api.source import SourceReport
@@ -70,7 +70,6 @@ def auto_status_aspect(
     for wu in stream:
         urn = wu.get_urn()
         all_urns.add(urn)
-
         if not wu.is_primary_source:
             # If this is a non-primary source, we pretend like we've seen the status
             # aspect so that we don't try to emit a removal for it.
@@ -173,6 +172,23 @@ def auto_materialize_referenced_tags(
         ).as_workunit()
 
 
+def auto_lowercase_urns(
+    stream: Iterable[MetadataWorkUnit],
+) -> Iterable[MetadataWorkUnit]:
+    """Lowercase all dataset urns"""
+
+    for wu in stream:
+        try:
+            old_urn = wu.get_urn()
+            lowercase_dataset_urns(wu.metadata)
+            wu.id = wu.id.replace(old_urn, wu.get_urn())
+
+            yield wu
+        except Exception as e:
+            logger.warning(f"Failed to lowercase urns for {wu}: {e}", exc_info=True)
+            yield wu
+
+
 def auto_browse_path_v2(
     stream: Iterable[MetadataWorkUnit],
     *,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
index e577c2bac8bbd..552612f877b9a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
@@ -16,7 +16,6 @@
     make_dataplatform_instance_urn,
     make_dataset_urn,
     make_tag_urn,
-    set_dataset_urn_to_lower,
 )
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.mcp_builder import BigQueryDatasetKey, ContainerKey, ProjectIdKey
@@ -218,8 +217,6 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config):
         if self.config.enable_legacy_sharded_table_support:
             BigqueryTableIdentifier._BQ_SHARDED_TABLE_SUFFIX = ""
 
-        set_dataset_urn_to_lower(self.config.convert_urns_to_lowercase)
-
         self.bigquery_data_dictionary = BigQuerySchemaApi(
             self.report.schema_api_perf, self.config.get_bigquery_client()
         )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
index 483355a85ac05..944814b6936a4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
@@ -206,11 +206,6 @@ def validate_column_lineage(cls, v: bool, values: Dict[str, Any]) -> bool:
         description="This flag enables the data lineage extraction from Data Lineage API exposed by Google Data Catalog. NOTE: This extractor can't build views lineage. It's recommended to enable the view's DDL parsing. Read the docs to have more information about: https://cloud.google.com/data-catalog/docs/concepts/about-data-lineage",
     )
 
-    convert_urns_to_lowercase: bool = Field(
-        default=False,
-        description="Convert urns to lowercase.",
-    )
-
     enable_legacy_sharded_table_support: bool = Field(
         default=True,
         description="Use the legacy sharded table urn suffix added.",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka.py b/metadata-ingestion/src/datahub/ingestion/source/kafka.py
index 566304e1999b7..d5039360da567 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/kafka.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka.py
@@ -18,7 +18,10 @@
 
 from datahub.configuration.common import AllowDenyPattern
 from datahub.configuration.kafka import KafkaConsumerConnectionConfig
-from datahub.configuration.source_common import DatasetSourceConfigMixin
+from datahub.configuration.source_common import (
+    DatasetSourceConfigMixin,
+    LowerCaseDatasetUrnConfigMixin,
+)
 from datahub.emitter import mce_builder
 from datahub.emitter.mce_builder import (
     make_data_platform_urn,
@@ -76,7 +79,11 @@ class KafkaTopicConfigKeys(str, Enum):
     UNCLEAN_LEADER_ELECTION_CONFIG = "unclean.leader.election.enable"
 
 
-class KafkaSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin):
+class KafkaSourceConfig(
+    StatefulIngestionConfigBase,
+    DatasetSourceConfigMixin,
+    LowerCaseDatasetUrnConfigMixin,
+):
     connection: KafkaConsumerConnectionConfig = KafkaConsumerConnectionConfig()
 
     topic_patterns: AllowDenyPattern = AllowDenyPattern(allow=[".*"], deny=["^_.*"])
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py
index 677d32c8bac08..08cc74aec3977 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py
@@ -7,7 +7,10 @@
 from pydantic import Field
 
 from datahub.configuration.common import AllowDenyPattern, ConfigModel
-from datahub.configuration.source_common import DatasetSourceConfigMixin
+from datahub.configuration.source_common import (
+    DatasetSourceConfigMixin,
+    LowerCaseDatasetUrnConfigMixin,
+)
 from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
 from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig
 from datahub.ingestion.source.state.stale_entity_removal_handler import (
@@ -21,7 +24,11 @@
 logger: logging.Logger = logging.getLogger(__name__)
 
 
-class SQLCommonConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin):
+class SQLCommonConfig(
+    StatefulIngestionConfigBase,
+    DatasetSourceConfigMixin,
+    LowerCaseDatasetUrnConfigMixin,
+):
     options: dict = pydantic.Field(
         default_factory=dict,
         description="Any options specified here will be passed to [SQLAlchemy.create_engine](https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine) as kwargs.",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
index 51390873712d3..a57ee39848855 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
@@ -7,7 +7,10 @@
 from pydantic import Field
 
 from datahub.configuration.common import AllowDenyPattern, ConfigModel
-from datahub.configuration.source_common import DatasetSourceConfigMixin
+from datahub.configuration.source_common import (
+    DatasetSourceConfigMixin,
+    LowerCaseDatasetUrnConfigMixin,
+)
 from datahub.configuration.validate_field_removal import pydantic_removed_field
 from datahub.configuration.validate_field_rename import pydantic_renamed_field
 from datahub.ingestion.source.state.stale_entity_removal_handler import (
@@ -91,6 +94,7 @@ class UnityCatalogSourceConfig(
     BaseUsageConfig,
     DatasetSourceConfigMixin,
     StatefulProfilingConfigMixin,
+    LowerCaseDatasetUrnConfigMixin,
 ):
     token: str = pydantic.Field(description="Databricks personal access token")
     workspace_url: str = pydantic.Field(
diff --git a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py
index 261f95331af61..e13d439161064 100644
--- a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py
+++ b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py
@@ -3,7 +3,11 @@
 from avro.schema import Field, RecordSchema
 
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
-from datahub.metadata.schema_classes import DictWrapper
+from datahub.metadata.schema_classes import (
+    DictWrapper,
+    MetadataChangeEventClass,
+    MetadataChangeProposalClass,
+)
 from datahub.utilities.urns.dataset_urn import DatasetUrn
 from datahub.utilities.urns.urn import Urn, guess_entity_type
 
@@ -32,7 +36,7 @@ def list_urns_with_path(
 
     if isinstance(model, MetadataChangeProposalWrapper):
         if model.entityUrn:
-            urns.append((model.entityUrn, ["urn"]))
+            urns.append((model.entityUrn, ["entityUrn"]))
         if model.entityKeyAspect:
             urns.extend(
                 _add_prefix_to_paths(
@@ -83,7 +87,15 @@ def list_urns(model: Union[DictWrapper, MetadataChangeProposalWrapper]) -> List[
     return [urn for urn, _ in list_urns_with_path(model)]
 
 
-def transform_urns(model: DictWrapper, func: Callable[[str], str]) -> None:
+def transform_urns(
+    model: Union[
+        DictWrapper,
+        MetadataChangeEventClass,
+        MetadataChangeProposalClass,
+        MetadataChangeProposalWrapper,
+    ],
+    func: Callable[[str], str],
+) -> None:
     """
     Rewrites all URNs in the given object according to the given function.
     """
@@ -95,7 +107,9 @@ def transform_urns(model: DictWrapper, func: Callable[[str], str]) -> None:
 
 
 def _modify_at_path(
-    model: Union[DictWrapper, list], path: _Path, new_value: str
+    model: Union[DictWrapper, MetadataChangeProposalWrapper, list],
+    path: _Path,
+    new_value: str,
 ) -> None:
     assert len(path) > 0
 
@@ -103,6 +117,8 @@ def _modify_at_path(
         if isinstance(path[0], int):
             assert isinstance(model, list)
             model[path[0]] = new_value
+        elif isinstance(model, MetadataChangeProposalWrapper):
+            setattr(model, path[0], new_value)
         else:
             assert isinstance(model, DictWrapper)
             model._inner_dict[path[0]] = new_value
@@ -120,7 +136,14 @@ def _lowercase_dataset_urn(dataset_urn: str) -> str:
     return str(cur_urn)
 
 
-def lowercase_dataset_urns(model: DictWrapper) -> None:
+def lowercase_dataset_urns(
+    model: Union[
+        DictWrapper,
+        MetadataChangeEventClass,
+        MetadataChangeProposalClass,
+        MetadataChangeProposalWrapper,
+    ]
+) -> None:
     def modify_urn(urn: str) -> str:
         if guess_entity_type(urn) == "dataset":
             return _lowercase_dataset_urn(urn)
diff --git a/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py b/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py
index b6ec6ebce240c..b667af8bb41e9 100644
--- a/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py
+++ b/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py
@@ -16,6 +16,7 @@
 from datahub.ingestion.api.source_helpers import (
     auto_browse_path_v2,
     auto_empty_dataset_usage_statistics,
+    auto_lowercase_urns,
     auto_status_aspect,
     auto_workunit,
 )
@@ -275,6 +276,75 @@ def test_auto_browse_path_v2_legacy_browse_path(telemetry_ping_mock):
     assert paths["platform,dataset-2,PROD)"] == _make_browse_path_entries(["something"])
 
 
+def test_auto_lowercase_aspects():
+    mcws = auto_workunit(
+        [
+            MetadataChangeProposalWrapper(
+                entityUrn=make_dataset_urn(
+                    "bigquery", "myProject.mySchema.myTable", "PROD"
+                ),
+                aspect=models.DatasetKeyClass(
+                    "urn:li:dataPlatform:bigquery", "myProject.mySchema.myTable", "PROD"
+                ),
+            ),
+            MetadataChangeProposalWrapper(
+                entityUrn="urn:li:container:008e111aa1d250dd52e0fd5d4b307b1a",
+                aspect=models.ContainerPropertiesClass(
+                    name="test",
+                ),
+            ),
+            models.MetadataChangeEventClass(
+                proposedSnapshot=models.DatasetSnapshotClass(
+                    urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,bigquery-Public-Data.Covid19_Aha.staffing,PROD)",
+                    aspects=[
+                        models.DatasetPropertiesClass(
+                            customProperties={
+                                "key": "value",
+                            },
+                        ),
+                    ],
+                ),
+            ),
+        ]
+    )
+
+    expected = [
+        *list(
+            auto_workunit(
+                [
+                    MetadataChangeProposalWrapper(
+                        entityUrn="urn:li:dataset:(urn:li:dataPlatform:bigquery,myproject.myschema.mytable,PROD)",
+                        aspect=models.DatasetKeyClass(
+                            "urn:li:dataPlatform:bigquery",
+                            "myProject.mySchema.myTable",
+                            "PROD",
+                        ),
+                    ),
+                    MetadataChangeProposalWrapper(
+                        entityUrn="urn:li:container:008e111aa1d250dd52e0fd5d4b307b1a",
+                        aspect=models.ContainerPropertiesClass(
+                            name="test",
+                        ),
+                    ),
+                    models.MetadataChangeEventClass(
+                        proposedSnapshot=models.DatasetSnapshotClass(
+                            urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,bigquery-public-data.covid19_aha.staffing,PROD)",
+                            aspects=[
+                                models.DatasetPropertiesClass(
+                                    customProperties={
+                                        "key": "value",
+                                    },
+                                ),
+                            ],
+                        ),
+                    ),
+                ]
+            )
+        ),
+    ]
+    assert list(auto_lowercase_urns(mcws)) == expected
+
+
 @patch("datahub.ingestion.api.source_helpers.telemetry.telemetry_instance.ping")
 def test_auto_browse_path_v2_container_over_legacy_browse_path(telemetry_ping_mock):
     structure = {"a": {"b": ["c"]}}

From 8813ae2fb15a1f80d5f0ef433fce1f84e1a240b5 Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Thu, 12 Oct 2023 07:58:10 -0400
Subject: [PATCH 122/156] feat(ingest/s3): support .gzip and fix decompression
 bug (#8990)

---
 .../ingestion/source/data_lake_common/path_spec.py       | 9 ++++++++-
 .../src/datahub/ingestion/source/s3/source.py            | 8 +++++++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py
index d1c949f48e2cd..a35fb94614f72 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py
@@ -18,7 +18,14 @@
 logger: logging.Logger = logging.getLogger(__name__)
 
 SUPPORTED_FILE_TYPES: List[str] = ["csv", "tsv", "json", "parquet", "avro"]
-SUPPORTED_COMPRESSIONS: List[str] = ["gz", "bz2"]
+
+# These come from the smart_open library.
+SUPPORTED_COMPRESSIONS: List[str] = [
+    "gz",
+    "bz2",
+    # We have a monkeypatch on smart_open that aliases .gzip to .gz.
+    "gzip",
+]
 
 
 class PathSpec(ConfigModel):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
index ac4433b7eb1f0..eb49fcbb268c0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
@@ -10,6 +10,7 @@
 from pathlib import PurePath
 from typing import Any, Dict, Iterable, List, Optional, Tuple
 
+import smart_open.compression as so_compression
 from more_itertools import peekable
 from pyspark.conf import SparkConf
 from pyspark.sql import SparkSession
@@ -120,6 +121,9 @@
 }
 PAGE_SIZE = 1000
 
+# Hack to support the .gzip extension with smart_open.
+so_compression.register_compressor(".gzip", so_compression._COMPRESSOR_REGISTRY[".gz"])
+
 
 def get_column_type(
     report: SourceReport, dataset_name: str, column_type: str
@@ -407,7 +411,9 @@ def get_fields(self, table_data: TableData, path_spec: PathSpec) -> List:
                 table_data.full_path, "rb", transport_params={"client": s3_client}
             )
         else:
-            file = open(table_data.full_path, "rb")
+            # We still use smart_open here to take advantage of the compression
+            # capabilities of smart_open.
+            file = smart_open(table_data.full_path, "rb")
 
         fields = []
 

From f6e131206394e1f56e4f966689c8abd1e8641919 Mon Sep 17 00:00:00 2001
From: Pedro Silva <pedro@acryl.io>
Date: Thu, 12 Oct 2023 18:43:14 +0100
Subject: [PATCH 123/156] feat(ingestion): Adds support for  memory profiling
 (#8856)

Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
---
 docs-website/sidebars.js                      |   1 +
 .../docs/dev_guides/profiling_ingestions.md   |  55 +++++++
 metadata-ingestion/setup.py                   |   5 +
 .../src/datahub/ingestion/run/pipeline.py     | 148 ++++++++++--------
 .../datahub/ingestion/run/pipeline_config.py  |   7 +
 5 files changed, 148 insertions(+), 68 deletions(-)
 create mode 100644 metadata-ingestion/docs/dev_guides/profiling_ingestions.md

diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index bdf3926c17e0d..21b3a1d3fe4d3 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -140,6 +140,7 @@ module.exports = {
             "metadata-ingestion/docs/dev_guides/classification",
             "metadata-ingestion/docs/dev_guides/add_stateful_ingestion_to_source",
             "metadata-ingestion/docs/dev_guides/sql_profiles",
+            "metadata-ingestion/docs/dev_guides/profiling_ingestions",
           ],
         },
       ],
diff --git a/metadata-ingestion/docs/dev_guides/profiling_ingestions.md b/metadata-ingestion/docs/dev_guides/profiling_ingestions.md
new file mode 100644
index 0000000000000..d876d99b494f8
--- /dev/null
+++ b/metadata-ingestion/docs/dev_guides/profiling_ingestions.md
@@ -0,0 +1,55 @@
+import FeatureAvailability from '@site/src/components/FeatureAvailability';
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Profiling ingestions
+
+<FeatureAvailability/>
+
+**🤝 Version compatibility**
+> Open Source DataHub: **0.11.1** | Acryl: **0.2.12**
+
+This page documents how to perform memory profiles of ingestion runs. 
+It is useful when trying to size the amount of resources necessary to ingest some source or when developing new features or sources.
+
+## How to use
+Install the `debug` plugin for DataHub's CLI wherever the ingestion runs:
+
+```bash
+pip install 'acryl-datahub[debug]'
+```
+
+This will install [memray](https://github.com/bloomberg/memray) in your python environment.
+
+Add a flag to your ingestion recipe to generate a memray memory dump of your ingestion:
+```yaml
+source:
+  ...
+
+sink:
+  ...
+
+flags:
+  generate_memory_profiles: "<path to folder where dumps will be written to>"
+```
+
+Once the ingestion run starts a binary file will be created and appended to during the execution of the ingestion. 
+
+These files follow the pattern `file-<ingestion-run-urn>.bin` for a unique identification.
+Once the ingestion has finished you can use `memray` to analyze the memory dump in a flamegraph view using:
+
+```$ memray flamegraph file-None-file-2023_09_18-21_38_43.bin```
+
+This will generate an interactive HTML file for analysis:
+
+<p align="center">
+    <img width="70%" src="https://github.com/datahub-project/static-assets/blob/main/imgs/metadata-ingestion/memray-example.png?raw=true"/>
+</p>
+
+
+`memray` has an extensive set of features for memory investigation. Take a look at their [documentation](https://bloomberg.github.io/memray/overview.html) to see the full feature set.
+
+
+## Questions
+
+If you've got any questions on configuring profiling, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index fe8e3be4632c4..61e7b684682a4 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -431,6 +431,10 @@
 deepdiff_dep = "deepdiff"
 test_api_requirements = {pytest_dep, deepdiff_dep, "PyYAML"}
 
+debug_requirements = {
+    "memray"
+}
+
 base_dev_requirements = {
     *base_requirements,
     *framework_common,
@@ -723,5 +727,6 @@
         "dev": list(dev_requirements),
         "testing-utils": list(test_api_requirements),  # To import `datahub.testing`
         "integration-tests": list(full_test_dev_requirements),
+        "debug": list(debug_requirements),
     },
 )
diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
index 79d959965e0dd..07b55e0e25a89 100644
--- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
+++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
@@ -353,77 +353,89 @@ def _time_to_print(self) -> bool:
         return False
 
     def run(self) -> None:
-        self.final_status = "unknown"
-        self._notify_reporters_on_ingestion_start()
-        callback = None
-        try:
-            callback = (
-                LoggingCallback()
-                if not self.config.failure_log.enabled
-                else DeadLetterQueueCallback(
-                    self.ctx, self.config.failure_log.log_config
-                )
-            )
-            for wu in itertools.islice(
-                self.source.get_workunits(),
-                self.preview_workunits if self.preview_mode else None,
-            ):
-                try:
-                    if self._time_to_print():
-                        self.pretty_print_summary(currently_running=True)
-                except Exception as e:
-                    logger.warning(f"Failed to print summary {e}")
-
-                if not self.dry_run:
-                    self.sink.handle_work_unit_start(wu)
-                try:
-                    record_envelopes = self.extractor.get_records(wu)
-                    for record_envelope in self.transform(record_envelopes):
-                        if not self.dry_run:
-                            self.sink.write_record_async(record_envelope, callback)
-
-                except RuntimeError:
-                    raise
-                except SystemExit:
-                    raise
-                except Exception as e:
-                    logger.error(
-                        "Failed to process some records. Continuing.", exc_info=e
+        with contextlib.ExitStack() as stack:
+            if self.config.flags.generate_memory_profiles:
+                import memray
+
+                stack.enter_context(
+                    memray.Tracker(
+                        f"{self.config.flags.generate_memory_profiles}/{self.config.run_id}.bin"
                     )
-                    # TODO: Transformer errors should cause the pipeline to fail.
-
-                self.extractor.close()
-                if not self.dry_run:
-                    self.sink.handle_work_unit_end(wu)
-            self.source.close()
-            # no more data is coming, we need to let the transformers produce any additional records if they are holding on to state
-            for record_envelope in self.transform(
-                [
-                    RecordEnvelope(
-                        record=EndOfStream(), metadata={"workunit_id": "end-of-stream"}
+                )
+
+            self.final_status = "unknown"
+            self._notify_reporters_on_ingestion_start()
+            callback = None
+            try:
+                callback = (
+                    LoggingCallback()
+                    if not self.config.failure_log.enabled
+                    else DeadLetterQueueCallback(
+                        self.ctx, self.config.failure_log.log_config
                     )
-                ]
-            ):
-                if not self.dry_run and not isinstance(
-                    record_envelope.record, EndOfStream
+                )
+                for wu in itertools.islice(
+                    self.source.get_workunits(),
+                    self.preview_workunits if self.preview_mode else None,
+                ):
+                    try:
+                        if self._time_to_print():
+                            self.pretty_print_summary(currently_running=True)
+                    except Exception as e:
+                        logger.warning(f"Failed to print summary {e}")
+
+                    if not self.dry_run:
+                        self.sink.handle_work_unit_start(wu)
+                    try:
+                        record_envelopes = self.extractor.get_records(wu)
+                        for record_envelope in self.transform(record_envelopes):
+                            if not self.dry_run:
+                                self.sink.write_record_async(record_envelope, callback)
+
+                    except RuntimeError:
+                        raise
+                    except SystemExit:
+                        raise
+                    except Exception as e:
+                        logger.error(
+                            "Failed to process some records. Continuing.",
+                            exc_info=e,
+                        )
+                        # TODO: Transformer errors should cause the pipeline to fail.
+
+                    self.extractor.close()
+                    if not self.dry_run:
+                        self.sink.handle_work_unit_end(wu)
+                self.source.close()
+                # no more data is coming, we need to let the transformers produce any additional records if they are holding on to state
+                for record_envelope in self.transform(
+                    [
+                        RecordEnvelope(
+                            record=EndOfStream(),
+                            metadata={"workunit_id": "end-of-stream"},
+                        )
+                    ]
                 ):
-                    # TODO: propagate EndOfStream and other control events to sinks, to allow them to flush etc.
-                    self.sink.write_record_async(record_envelope, callback)
-
-            self.sink.close()
-            self.process_commits()
-            self.final_status = "completed"
-        except (SystemExit, RuntimeError, KeyboardInterrupt) as e:
-            self.final_status = "cancelled"
-            logger.error("Caught error", exc_info=e)
-            raise
-        finally:
-            clear_global_warnings()
-
-            if callback and hasattr(callback, "close"):
-                callback.close()  # type: ignore
-
-            self._notify_reporters_on_ingestion_completion()
+                    if not self.dry_run and not isinstance(
+                        record_envelope.record, EndOfStream
+                    ):
+                        # TODO: propagate EndOfStream and other control events to sinks, to allow them to flush etc.
+                        self.sink.write_record_async(record_envelope, callback)
+
+                self.sink.close()
+                self.process_commits()
+                self.final_status = "completed"
+            except (SystemExit, RuntimeError, KeyboardInterrupt) as e:
+                self.final_status = "cancelled"
+                logger.error("Caught error", exc_info=e)
+                raise
+            finally:
+                clear_global_warnings()
+
+                if callback and hasattr(callback, "close"):
+                    callback.close()  # type: ignore
+
+                self._notify_reporters_on_ingestion_completion()
 
     def transform(self, records: Iterable[RecordEnvelope]) -> Iterable[RecordEnvelope]:
         """
diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
index ff9a7a6f3d146..da3cee8ad9c1b 100644
--- a/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline_config.py
@@ -57,6 +57,13 @@ class FlagsConfig(ConfigModel):
         ),
     )
 
+    generate_memory_profiles: Optional[str] = Field(
+        default=None,
+        description=(
+            "Generate memray memory dumps for ingestion process by providing a path to write the dump file in."
+        ),
+    )
+
 
 class PipelineConfig(ConfigModel):
     # Once support for discriminated unions gets merged into Pydantic, we can

From c564abcbf049e5251f9cc25bf0e339956279649d Mon Sep 17 00:00:00 2001
From: Amanda Hernando <110099762+amanda-her@users.noreply.github.com>
Date: Thu, 12 Oct 2023 20:38:42 +0200
Subject: [PATCH 124/156] feat(auth): add group membership field resolver
 provider (#8846)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Adrián Pertíñez <khurzak92@gmail.com>
Co-authored-by: Adrián Pertíñez <adrian.pertinez@thoughtworks.com>
---
 .../authorization/AuthorizationUtils.java     |   8 +-
 .../dataset/DatasetStatsSummaryResolver.java  |   4 +-
 .../dataset/DatasetUsageStatsResolver.java    |   4 +-
 .../load/TimeSeriesAspectResolver.java        |   4 +-
 .../policy/GetGrantedPrivilegesResolver.java  |   6 +-
 .../resolvers/glossary/GlossaryUtilsTest.java |  36 +--
 .../query/CreateQueryResolverTest.java        |   6 +-
 .../query/DeleteQueryResolverTest.java        |   6 +-
 .../query/UpdateQueryResolverTest.java        |  10 +-
 .../com/datahub/authorization/AuthUtil.java   |  10 +-
 .../authorization/AuthorizationRequest.java   |   2 +-
 .../authorization/AuthorizerContext.java      |   4 +-
 .../authorization/EntityFieldType.java        |  31 ++
 .../com/datahub/authorization/EntitySpec.java |  23 ++
 .../authorization/EntitySpecResolver.java     |  11 +
 .../datahub/authorization/FieldResolver.java  |   6 +-
 .../authorization/ResolvedEntitySpec.java     |  66 ++++
 .../authorization/ResolvedResourceSpec.java   |  55 ----
 .../authorization/ResourceFieldType.java      |  27 --
 .../datahub/authorization/ResourceSpec.java   |  23 --
 .../authorization/ResourceSpecResolver.java   |  11 -
 .../auth/authorization/Authorizer.java        |   4 +-
 .../authorization/AuthorizerChain.java        |   2 +-
 .../authorization/DataHubAuthorizer.java      |  42 ++-
 ...er.java => DefaultEntitySpecResolver.java} |  33 +-
 .../datahub/authorization/FilterUtils.java    |   8 +-
 .../datahub/authorization/PolicyEngine.java   | 206 +++++-------
 ...PlatformInstanceFieldResolverProvider.java |  28 +-
 .../DomainFieldResolverProvider.java          |  20 +-
 .../EntityFieldResolverProvider.java          |  22 ++
 .../EntityTypeFieldResolverProvider.java      |  16 +-
 .../EntityUrnFieldResolverProvider.java       |  16 +-
 .../GroupMembershipFieldResolverProvider.java |  78 +++++
 .../OwnerFieldResolverProvider.java           |  20 +-
 .../ResourceFieldResolverProvider.java        |  22 --
 .../authorization/DataHubAuthorizerTest.java  |  22 +-
 .../authorization/PolicyEngineTest.java       | 304 ++++++++----------
 ...formInstanceFieldResolverProviderTest.java |  37 ++-
 ...upMembershipFieldResolverProviderTest.java | 212 ++++++++++++
 .../factory/auth/AuthorizerChainFactory.java  |  14 +-
 .../delegates/EntityApiDelegateImpl.java      |   9 +-
 .../openapi/entities/EntitiesController.java  |  10 +-
 .../RelationshipsController.java              |   6 +-
 .../openapi/timeline/TimelineController.java  |   4 +-
 .../openapi/util/MappingUtil.java             |  11 +-
 .../datahub/plugins/test/TestAuthorizer.java  |   4 +-
 .../resources/entity/AspectResource.java      |  13 +-
 .../entity/BatchIngestionRunResource.java     |   6 +-
 .../resources/entity/EntityResource.java      |  54 ++--
 .../resources/entity/EntityV2Resource.java    |   8 +-
 .../entity/EntityVersionedV2Resource.java     |   6 +-
 .../resources/lineage/Relationships.java      |   8 +-
 .../metadata/resources/operations/Utils.java  |   6 +-
 .../resources/platform/PlatformResource.java  |   4 +-
 .../resources/restli/RestliUtils.java         |   6 +-
 .../metadata/resources/usage/UsageStats.java  |   8 +-
 56 files changed, 937 insertions(+), 685 deletions(-)
 create mode 100644 metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntityFieldType.java
 create mode 100644 metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpec.java
 create mode 100644 metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpecResolver.java
 create mode 100644 metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedEntitySpec.java
 delete mode 100644 metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java
 delete mode 100644 metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceFieldType.java
 delete mode 100644 metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpec.java
 delete mode 100644 metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpecResolver.java
 rename metadata-service/auth-impl/src/main/java/com/datahub/authorization/{DefaultResourceSpecResolver.java => DefaultEntitySpecResolver.java} (51%)
 create mode 100644 metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityFieldResolverProvider.java
 create mode 100644 metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProvider.java
 delete mode 100644 metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/ResourceFieldResolverProvider.java
 create mode 100644 metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProviderTest.java

diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java
index 3089b8c8fc2db..03e63c7fb472f 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java
@@ -4,7 +4,7 @@
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.datahub.authorization.ConjunctivePrivilegeGroup;
 import com.datahub.authorization.DisjunctivePrivilegeGroup;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.AuditStamp;
 import com.linkedin.common.urn.Urn;
@@ -90,7 +90,7 @@ public static boolean canManageTags(@Nonnull QueryContext context) {
   }
 
   public static boolean canDeleteEntity(@Nonnull Urn entityUrn, @Nonnull QueryContext context) {
-    return isAuthorized(context, Optional.of(new ResourceSpec(entityUrn.getEntityType(), entityUrn.toString())), PoliciesConfig.DELETE_ENTITY_PRIVILEGE);
+    return isAuthorized(context, Optional.of(new EntitySpec(entityUrn.getEntityType(), entityUrn.toString())), PoliciesConfig.DELETE_ENTITY_PRIVILEGE);
   }
 
   public static boolean canManageUserCredentials(@Nonnull QueryContext context) {
@@ -173,7 +173,7 @@ public static boolean canDeleteQuery(@Nonnull Urn entityUrn, @Nonnull List<Urn>
 
   public static boolean isAuthorized(
       @Nonnull QueryContext context,
-      @Nonnull Optional<ResourceSpec> resourceSpec,
+      @Nonnull Optional<EntitySpec> resourceSpec,
       @Nonnull PoliciesConfig.Privilege privilege) {
     final Authorizer authorizer = context.getAuthorizer();
     final String actor = context.getActorUrn();
@@ -196,7 +196,7 @@ public static boolean isAuthorized(
       @Nonnull String resource,
       @Nonnull DisjunctivePrivilegeGroup privilegeGroup
   ) {
-    final ResourceSpec resourceSpec = new ResourceSpec(resourceType, resource);
+    final EntitySpec resourceSpec = new EntitySpec(resourceType, resource);
     return AuthUtil.isAuthorized(authorizer, actor, Optional.of(resourceSpec), privilegeGroup);
   }
 
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java
index 23be49c7e7140..2873866bb34f7 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java
@@ -1,6 +1,6 @@
 package com.linkedin.datahub.graphql.resolvers.dataset;
 
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 import com.linkedin.common.urn.Urn;
@@ -104,7 +104,7 @@ private CorpUser createPartialUser(final Urn userUrn) {
 
   private boolean isAuthorized(final Urn resourceUrn, final QueryContext context) {
     return AuthorizationUtils.isAuthorized(context,
-            Optional.of(new ResourceSpec(resourceUrn.getEntityType(), resourceUrn.toString())),
+            Optional.of(new EntitySpec(resourceUrn.getEntityType(), resourceUrn.toString())),
             PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE);
   }
 }
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java
index 20361830ad5a5..e4bec8e896fdf 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java
@@ -1,6 +1,6 @@
 package com.linkedin.datahub.graphql.resolvers.dataset;
 
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.datahub.graphql.QueryContext;
@@ -52,7 +52,7 @@ public CompletableFuture<UsageQueryResult> get(DataFetchingEnvironment environme
 
   private boolean isAuthorized(final Urn resourceUrn, final QueryContext context) {
     return AuthorizationUtils.isAuthorized(context,
-        Optional.of(new ResourceSpec(resourceUrn.getEntityType(), resourceUrn.toString())),
+        Optional.of(new EntitySpec(resourceUrn.getEntityType(), resourceUrn.toString())),
         PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE);
   }
 }
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java
index 197ca8640559d..f13ebf8373e91 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java
@@ -1,6 +1,6 @@
 package com.linkedin.datahub.graphql.resolvers.load;
 
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.linkedin.datahub.graphql.QueryContext;
 import com.linkedin.datahub.graphql.authorization.AuthorizationUtils;
 import com.linkedin.datahub.graphql.generated.Entity;
@@ -79,7 +79,7 @@ public TimeSeriesAspectResolver(
   private boolean isAuthorized(QueryContext context, String urn) {
     if (_entityName.equals(Constants.DATASET_ENTITY_NAME) && _aspectName.equals(
         Constants.DATASET_PROFILE_ASPECT_NAME)) {
-      return AuthorizationUtils.isAuthorized(context, Optional.of(new ResourceSpec(_entityName, urn)),
+      return AuthorizationUtils.isAuthorized(context, Optional.of(new EntitySpec(_entityName, urn)),
           PoliciesConfig.VIEW_DATASET_PROFILE_PRIVILEGE);
     }
     return true;
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/GetGrantedPrivilegesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/GetGrantedPrivilegesResolver.java
index 2f20fdaf1e9b1..11f7793db82c8 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/GetGrantedPrivilegesResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/GetGrantedPrivilegesResolver.java
@@ -2,7 +2,7 @@
 
 import com.datahub.authorization.AuthorizerChain;
 import com.datahub.authorization.DataHubAuthorizer;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.linkedin.datahub.graphql.QueryContext;
 import com.linkedin.datahub.graphql.exception.AuthorizationException;
 import com.linkedin.datahub.graphql.generated.GetGrantedPrivilegesInput;
@@ -33,8 +33,8 @@ public CompletableFuture<Privileges> get(final DataFetchingEnvironment environme
     if (!isAuthorized(context, actor)) {
       throw new AuthorizationException("Unauthorized to get privileges for the given author.");
     }
-    final Optional<ResourceSpec> resourceSpec = Optional.ofNullable(input.getResourceSpec())
-        .map(spec -> new ResourceSpec(EntityTypeMapper.getName(spec.getResourceType()), spec.getResourceUrn()));
+    final Optional<EntitySpec> resourceSpec = Optional.ofNullable(input.getResourceSpec())
+        .map(spec -> new EntitySpec(EntityTypeMapper.getName(spec.getResourceType()), spec.getResourceUrn()));
 
     if (context.getAuthorizer() instanceof AuthorizerChain) {
       DataHubAuthorizer dataHubAuthorizer = ((AuthorizerChain) context.getAuthorizer()).getDefaultAuthorizer();
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GlossaryUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GlossaryUtilsTest.java
index ccaab44f60dd4..8bfc32e1999ae 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GlossaryUtilsTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GlossaryUtilsTest.java
@@ -5,7 +5,7 @@
 import com.datahub.authorization.AuthorizationRequest;
 import com.datahub.authorization.AuthorizationResult;
 import com.datahub.plugins.auth.authorization.Authorizer;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.linkedin.common.urn.GlossaryNodeUrn;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
@@ -89,17 +89,17 @@ private void setUpTests() throws Exception {
       Mockito.any(Authentication.class)
     )).thenReturn(new EntityResponse().setAspects(new EnvelopedAspectMap(parentNode3Aspects)));
 
-    final ResourceSpec resourceSpec3 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString());
+    final EntitySpec resourceSpec3 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString());
     mockAuthRequest("MANAGE_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec3);
 
-    final ResourceSpec resourceSpec2 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString());
+    final EntitySpec resourceSpec2 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString());
     mockAuthRequest("MANAGE_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec2);
 
-    final ResourceSpec resourceSpec1 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString());
+    final EntitySpec resourceSpec1 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString());
     mockAuthRequest("MANAGE_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec1);
   }
 
-  private void mockAuthRequest(String privilege, AuthorizationResult.Type allowOrDeny, ResourceSpec resourceSpec) {
+  private void mockAuthRequest(String privilege, AuthorizationResult.Type allowOrDeny, EntitySpec resourceSpec) {
     final AuthorizationRequest authorizationRequest = new AuthorizationRequest(
         userUrn,
         privilege,
@@ -150,7 +150,7 @@ public void testCanManageChildrenEntitiesAuthorized() throws Exception {
     // they do NOT have the MANAGE_GLOSSARIES platform privilege
     mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null);
 
-    final ResourceSpec resourceSpec = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn.toString());
+    final EntitySpec resourceSpec = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn.toString());
     mockAuthRequest("MANAGE_GLOSSARY_CHILDREN", AuthorizationResult.Type.ALLOW, resourceSpec);
 
     assertTrue(GlossaryUtils.canManageChildrenEntities(mockContext, parentNodeUrn, mockClient));
@@ -162,7 +162,7 @@ public void testCanManageChildrenEntitiesUnauthorized() throws Exception {
     // they do NOT have the MANAGE_GLOSSARIES platform privilege
     mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null);
 
-    final ResourceSpec resourceSpec = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn.toString());
+    final EntitySpec resourceSpec = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn.toString());
     mockAuthRequest("MANAGE_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec);
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec);
 
@@ -175,13 +175,13 @@ public void testCanManageChildrenRecursivelyEntitiesAuthorized() throws Exceptio
     // they do NOT have the MANAGE_GLOSSARIES platform privilege
     mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null);
 
-    final ResourceSpec resourceSpec3 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString());
+    final EntitySpec resourceSpec3 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.ALLOW, resourceSpec3);
 
-    final ResourceSpec resourceSpec2 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString());
+    final EntitySpec resourceSpec2 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec2);
 
-    final ResourceSpec resourceSpec1 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString());
+    final EntitySpec resourceSpec1 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec1);
 
     assertTrue(GlossaryUtils.canManageChildrenEntities(mockContext, parentNodeUrn1, mockClient));
@@ -193,13 +193,13 @@ public void testCanManageChildrenRecursivelyEntitiesUnauthorized() throws Except
     // they do NOT have the MANAGE_GLOSSARIES platform privilege
     mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null);
 
-    final ResourceSpec resourceSpec3 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString());
+    final EntitySpec resourceSpec3 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec3);
 
-    final ResourceSpec resourceSpec2 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString());
+    final EntitySpec resourceSpec2 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec2);
 
-    final ResourceSpec resourceSpec1 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString());
+    final EntitySpec resourceSpec1 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec1);
 
     assertFalse(GlossaryUtils.canManageChildrenEntities(mockContext, parentNodeUrn1, mockClient));
@@ -211,10 +211,10 @@ public void testCanManageChildrenRecursivelyEntitiesAuthorizedLevel2() throws Ex
     // they do NOT have the MANAGE_GLOSSARIES platform privilege
     mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null);
 
-    final ResourceSpec resourceSpec2 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString());
+    final EntitySpec resourceSpec2 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.ALLOW, resourceSpec2);
 
-    final ResourceSpec resourceSpec1 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString());
+    final EntitySpec resourceSpec1 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn1.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec1);
 
     assertTrue(GlossaryUtils.canManageChildrenEntities(mockContext, parentNodeUrn1, mockClient));
@@ -226,10 +226,10 @@ public void testCanManageChildrenRecursivelyEntitiesUnauthorizedLevel2() throws
     // they do NOT have the MANAGE_GLOSSARIES platform privilege
     mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null);
 
-    final ResourceSpec resourceSpec3 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString());
+    final EntitySpec resourceSpec3 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec3);
 
-    final ResourceSpec resourceSpec2 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString());
+    final EntitySpec resourceSpec2 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn2.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec2);
 
     assertFalse(GlossaryUtils.canManageChildrenEntities(mockContext, parentNodeUrn2, mockClient));
@@ -241,7 +241,7 @@ public void testCanManageChildrenRecursivelyEntitiesNoLevel2() throws Exception
     // they do NOT have the MANAGE_GLOSSARIES platform privilege
     mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null);
 
-    final ResourceSpec resourceSpec3 = new ResourceSpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString());
+    final EntitySpec resourceSpec3 = new EntitySpec(parentNodeUrn.getEntityType(), parentNodeUrn3.toString());
     mockAuthRequest("MANAGE_ALL_GLOSSARY_CHILDREN", AuthorizationResult.Type.DENY, resourceSpec3);
 
     assertFalse(GlossaryUtils.canManageChildrenEntities(mockContext, parentNodeUrn3, mockClient));
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/CreateQueryResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/CreateQueryResolverTest.java
index 196eb24b52bf8..9c04c67dd3a3b 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/CreateQueryResolverTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/CreateQueryResolverTest.java
@@ -5,7 +5,7 @@
 import com.datahub.authentication.Authentication;
 import com.datahub.authorization.AuthorizationRequest;
 import com.datahub.authorization.AuthorizationResult;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
@@ -201,7 +201,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) {
         TEST_ACTOR_URN.toString(),
         PoliciesConfig.EDIT_QUERIES_PRIVILEGE.getType(),
         Optional.of(
-            new ResourceSpec(
+            new EntitySpec(
                 TEST_DATASET_URN.getEntityType(),
                 TEST_DATASET_URN.toString()))
     );
@@ -210,7 +210,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) {
         TEST_ACTOR_URN.toString(),
         PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType(),
         Optional.of(
-            new ResourceSpec(
+            new EntitySpec(
                 TEST_DATASET_URN.getEntityType(),
                 TEST_DATASET_URN.toString()))
     );
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/DeleteQueryResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/DeleteQueryResolverTest.java
index a6b4887b0e882..78c894f27cbc3 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/DeleteQueryResolverTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/DeleteQueryResolverTest.java
@@ -5,7 +5,7 @@
 import com.datahub.authentication.Authentication;
 import com.datahub.authorization.AuthorizationRequest;
 import com.datahub.authorization.AuthorizationResult;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.urn.Urn;
@@ -134,7 +134,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) {
         DeleteQueryResolverTest.TEST_ACTOR_URN.toString(),
         PoliciesConfig.EDIT_QUERIES_PRIVILEGE.getType(),
         Optional.of(
-            new ResourceSpec(
+            new EntitySpec(
                 DeleteQueryResolverTest.TEST_DATASET_URN.getEntityType(),
                 DeleteQueryResolverTest.TEST_DATASET_URN.toString()))
     );
@@ -143,7 +143,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) {
         TEST_ACTOR_URN.toString(),
         PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType(),
         Optional.of(
-            new ResourceSpec(
+            new EntitySpec(
                 TEST_DATASET_URN.getEntityType(),
                 TEST_DATASET_URN.toString()))
     );
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/UpdateQueryResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/UpdateQueryResolverTest.java
index 7a76b6d6be5a4..9b500b5fb3936 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/UpdateQueryResolverTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/UpdateQueryResolverTest.java
@@ -5,7 +5,7 @@
 import com.datahub.authentication.Authentication;
 import com.datahub.authorization.AuthorizationRequest;
 import com.datahub.authorization.AuthorizationResult;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
@@ -206,7 +206,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) {
         TEST_ACTOR_URN.toString(),
         PoliciesConfig.EDIT_QUERIES_PRIVILEGE.getType(),
         Optional.of(
-            new ResourceSpec(
+            new EntitySpec(
                 TEST_DATASET_URN.getEntityType(),
                 TEST_DATASET_URN.toString()))
     );
@@ -215,7 +215,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) {
         TEST_ACTOR_URN.toString(),
         PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType(),
         Optional.of(
-            new ResourceSpec(
+            new EntitySpec(
                 TEST_DATASET_URN.getEntityType(),
                 TEST_DATASET_URN.toString()))
     );
@@ -224,7 +224,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) {
         TEST_ACTOR_URN.toString(),
         PoliciesConfig.EDIT_QUERIES_PRIVILEGE.getType(),
         Optional.of(
-            new ResourceSpec(
+            new EntitySpec(
                 TEST_DATASET_URN_2.getEntityType(),
                 TEST_DATASET_URN_2.toString()))
     );
@@ -233,7 +233,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) {
         TEST_ACTOR_URN.toString(),
         PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType(),
         Optional.of(
-            new ResourceSpec(
+            new EntitySpec(
                 TEST_DATASET_URN_2.getEntityType(),
                 TEST_DATASET_URN_2.toString()))
     );
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthUtil.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthUtil.java
index dfb936c61ee0c..e159993a8a243 100644
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthUtil.java
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthUtil.java
@@ -11,7 +11,7 @@ public class AuthUtil {
   public static boolean isAuthorized(
       @Nonnull Authorizer authorizer,
       @Nonnull String actor,
-      @Nonnull Optional<ResourceSpec> maybeResourceSpec,
+      @Nonnull Optional<EntitySpec> maybeResourceSpec,
       @Nonnull DisjunctivePrivilegeGroup privilegeGroup
   ) {
     for (ConjunctivePrivilegeGroup andPrivilegeGroup : privilegeGroup.getAuthorizedPrivilegeGroups()) {
@@ -27,7 +27,7 @@ public static boolean isAuthorized(
   public static boolean isAuthorizedForResources(
       @Nonnull Authorizer authorizer,
       @Nonnull String actor,
-      @Nonnull List<Optional<ResourceSpec>> resourceSpecs,
+      @Nonnull List<Optional<EntitySpec>> resourceSpecs,
       @Nonnull DisjunctivePrivilegeGroup privilegeGroup
   ) {
     for (ConjunctivePrivilegeGroup andPrivilegeGroup : privilegeGroup.getAuthorizedPrivilegeGroups()) {
@@ -44,7 +44,7 @@ private static boolean isAuthorized(
       @Nonnull Authorizer authorizer,
       @Nonnull String actor,
       @Nonnull ConjunctivePrivilegeGroup requiredPrivileges,
-      @Nonnull Optional<ResourceSpec> resourceSpec) {
+      @Nonnull Optional<EntitySpec> resourceSpec) {
     // Each privilege in a group _must_ all be true to permit the operation.
     for (final String privilege : requiredPrivileges.getRequiredPrivileges()) {
       // Create and evaluate an Authorization request.
@@ -62,11 +62,11 @@ private static boolean isAuthorizedForResources(
       @Nonnull Authorizer authorizer,
       @Nonnull String actor,
       @Nonnull ConjunctivePrivilegeGroup requiredPrivileges,
-      @Nonnull List<Optional<ResourceSpec>> resourceSpecs) {
+      @Nonnull List<Optional<EntitySpec>> resourceSpecs) {
     // Each privilege in a group _must_ all be true to permit the operation.
     for (final String privilege : requiredPrivileges.getRequiredPrivileges()) {
       // Create and evaluate an Authorization request.
-      for (Optional<ResourceSpec> resourceSpec : resourceSpecs) {
+      for (Optional<EntitySpec> resourceSpec : resourceSpecs) {
         final AuthorizationRequest request = new AuthorizationRequest(actor, privilege, resourceSpec);
         final AuthorizationResult result = authorizer.authorize(request);
         if (AuthorizationResult.Type.DENY.equals(result.getType())) {
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizationRequest.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizationRequest.java
index 084a455495551..9e75de3cbf44d 100644
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizationRequest.java
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizationRequest.java
@@ -21,5 +21,5 @@ public class AuthorizationRequest {
    * The resource that the user is requesting for, if applicable. If the privilege is a platform privilege
    * this optional will be empty.
    */
-  Optional<ResourceSpec> resourceSpec;
+  Optional<EntitySpec> resourceSpec;
 }
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizerContext.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizerContext.java
index f9940d171d5d4..b79a4fa20c7ea 100644
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizerContext.java
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizerContext.java
@@ -18,9 +18,9 @@ public class AuthorizerContext {
   private final Map<String, Object> contextMap;
 
   /**
-   * A utility for resolving a {@link ResourceSpec} to resolved resource field values.
+   * A utility for resolving an {@link EntitySpec} to resolved entity field values.
    */
-  private ResourceSpecResolver resourceSpecResolver;
+  private EntitySpecResolver entitySpecResolver;
 
   /**
    *
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntityFieldType.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntityFieldType.java
new file mode 100644
index 0000000000000..46763f29a7040
--- /dev/null
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntityFieldType.java
@@ -0,0 +1,31 @@
+package com.datahub.authorization;
+
+/**
+ * List of entity field types to fetch for a given entity
+ */
+public enum EntityFieldType {
+  /**
+   * Type of the entity (e.g. dataset, chart)
+   */
+  TYPE,
+  /**
+   * Urn of the entity
+   */
+  URN,
+  /**
+   * Owners of the entity
+   */
+  OWNER,
+  /**
+   * Domains of the entity
+   */
+  DOMAIN,
+  /**
+   * Groups of which the entity (only applies to corpUser) is a member
+   */
+  GROUP_MEMBERSHIP,
+  /**
+   * Data platform instance of resource
+   */
+  DATA_PLATFORM_INSTANCE
+}
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpec.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpec.java
new file mode 100644
index 0000000000000..656bec0f44fc2
--- /dev/null
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpec.java
@@ -0,0 +1,23 @@
+package com.datahub.authorization;
+
+import javax.annotation.Nonnull;
+import lombok.Value;
+
+
+/**
+ * Details about the entities involved in the authorization process. It models the actor and the resource being acted
+ * upon. Resource types currently supported can be found inside of {@link com.linkedin.metadata.authorization.PoliciesConfig}
+ */
+@Value
+public class EntitySpec {
+  /**
+   * The entity type. (dataset, chart, dashboard, corpGroup, etc).
+   */
+  @Nonnull
+  String type;
+  /**
+   * The entity identity. Most often, this corresponds to the raw entity urn. (urn:li:corpGroup:groupId)
+   */
+  @Nonnull
+  String entity;
+}
\ No newline at end of file
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpecResolver.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpecResolver.java
new file mode 100644
index 0000000000000..67347fbf87a87
--- /dev/null
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntitySpecResolver.java
@@ -0,0 +1,11 @@
+package com.datahub.authorization;
+
+/**
+ * An Entity Spec Resolver is responsible for resolving a {@link EntitySpec} to a {@link ResolvedEntitySpec}.
+ */
+public interface EntitySpecResolver {
+  /**
+   Resolve a {@link EntitySpec} to a resolved entity spec.
+   **/
+  ResolvedEntitySpec resolve(EntitySpec entitySpec);
+}
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/FieldResolver.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/FieldResolver.java
index 9318f5f8e7b96..955a06fd54cb9 100644
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/FieldResolver.java
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/FieldResolver.java
@@ -33,9 +33,9 @@ public static FieldResolver getResolverFromValues(Set<String> values) {
   /**
    * Helper function that returns FieldResolver given a fetchFieldValue function
    */
-  public static FieldResolver getResolverFromFunction(ResourceSpec resourceSpec,
-      Function<ResourceSpec, FieldValue> fetchFieldValue) {
-    return new FieldResolver(() -> CompletableFuture.supplyAsync(() -> fetchFieldValue.apply(resourceSpec)));
+  public static FieldResolver getResolverFromFunction(EntitySpec entitySpec,
+      Function<EntitySpec, FieldValue> fetchFieldValue) {
+    return new FieldResolver(() -> CompletableFuture.supplyAsync(() -> fetchFieldValue.apply(entitySpec)));
   }
 
   public static FieldValue emptyFieldValue() {
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedEntitySpec.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedEntitySpec.java
new file mode 100644
index 0000000000000..7948766df5715
--- /dev/null
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedEntitySpec.java
@@ -0,0 +1,66 @@
+package com.datahub.authorization;
+
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+import javax.annotation.Nullable;
+import lombok.Getter;
+import lombok.RequiredArgsConstructor;
+import lombok.ToString;
+
+
+/**
+ * Wrapper around authorization request with field resolvers for lazily fetching the field values for each field type
+ */
+@RequiredArgsConstructor
+@ToString
+public class ResolvedEntitySpec {
+  @Getter
+  private final EntitySpec spec;
+  private final Map<EntityFieldType, FieldResolver> fieldResolvers;
+
+  public Set<String> getFieldValues(EntityFieldType entityFieldType) {
+    if (!fieldResolvers.containsKey(entityFieldType)) {
+      return Collections.emptySet();
+    }
+    return fieldResolvers.get(entityFieldType).getFieldValuesFuture().join().getValues();
+  }
+
+  /**
+   * Fetch the owners for an entity.
+   * @return a set of owner urns, or empty set if none exist.
+   */
+  public Set<String> getOwners() {
+    if (!fieldResolvers.containsKey(EntityFieldType.OWNER)) {
+      return Collections.emptySet();
+    }
+    return fieldResolvers.get(EntityFieldType.OWNER).getFieldValuesFuture().join().getValues();
+  }
+
+  /**
+   * Fetch the platform instance for a Resolved Resource Spec
+   * @return a Platform Instance or null if one does not exist.
+   */
+  @Nullable
+  public String getDataPlatformInstance() {
+    if (!fieldResolvers.containsKey(EntityFieldType.DATA_PLATFORM_INSTANCE)) {
+      return null;
+    }
+    Set<String> dataPlatformInstance = fieldResolvers.get(EntityFieldType.DATA_PLATFORM_INSTANCE).getFieldValuesFuture().join().getValues();
+    if (dataPlatformInstance.size() > 0) {
+      return dataPlatformInstance.stream().findFirst().get();
+    }
+    return null;
+  }
+
+  /**
+   * Fetch the group membership for an entity.
+   * @return a set of groups urns, or empty set if none exist.
+   */
+  public Set<String> getGroupMembership() {
+    if (!fieldResolvers.containsKey(EntityFieldType.GROUP_MEMBERSHIP)) {
+      return Collections.emptySet();
+    }
+    return fieldResolvers.get(EntityFieldType.GROUP_MEMBERSHIP).getFieldValuesFuture().join().getValues();
+  }
+}
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java
deleted file mode 100644
index 8e429a8ca1b94..0000000000000
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResolvedResourceSpec.java
+++ /dev/null
@@ -1,55 +0,0 @@
-package com.datahub.authorization;
-
-import java.util.Collections;
-import java.util.Map;
-import java.util.Set;
-import javax.annotation.Nullable;
-import lombok.Getter;
-import lombok.RequiredArgsConstructor;
-import lombok.ToString;
-
-
-/**
- * Wrapper around authorization request with field resolvers for lazily fetching the field values for each field type
- */
-@RequiredArgsConstructor
-@ToString
-public class ResolvedResourceSpec {
-  @Getter
-  private final ResourceSpec spec;
-  private final Map<ResourceFieldType, FieldResolver> fieldResolvers;
-
-  public Set<String> getFieldValues(ResourceFieldType resourceFieldType) {
-    if (!fieldResolvers.containsKey(resourceFieldType)) {
-      return Collections.emptySet();
-    }
-    return fieldResolvers.get(resourceFieldType).getFieldValuesFuture().join().getValues();
-  }
-
-  /**
-   * Fetch the owners for a resource.
-   * @return a set of owner urns, or empty set if none exist.
-   */
-  public Set<String> getOwners() {
-    if (!fieldResolvers.containsKey(ResourceFieldType.OWNER)) {
-      return Collections.emptySet();
-    }
-    return fieldResolvers.get(ResourceFieldType.OWNER).getFieldValuesFuture().join().getValues();
-  }
-
-  /**
-   * Fetch the platform instance for a Resolved Resource Spec
-   * @return a Platform Instance or null if one does not exist.
-   */
-  @Nullable
-  public String getDataPlatformInstance() {
-    if (!fieldResolvers.containsKey(ResourceFieldType.DATA_PLATFORM_INSTANCE)) {
-      return null;
-    }
-    Set<String> dataPlatformInstance = fieldResolvers.get(ResourceFieldType.DATA_PLATFORM_INSTANCE).getFieldValuesFuture().join().getValues();
-    if (dataPlatformInstance.size() > 0) {
-      return dataPlatformInstance.stream().findFirst().get();
-    }
-    return null;
-  }
-}
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceFieldType.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceFieldType.java
deleted file mode 100644
index 478522dc7c331..0000000000000
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceFieldType.java
+++ /dev/null
@@ -1,27 +0,0 @@
-package com.datahub.authorization;
-
-/**
- * List of resource field types to fetch for a given resource
- */
-public enum ResourceFieldType {
-  /**
-   * Type of resource (e.g. dataset, chart)
-   */
-  RESOURCE_TYPE,
-  /**
-   * Urn of resource
-   */
-  RESOURCE_URN,
-  /**
-   * Owners of resource
-   */
-  OWNER,
-  /**
-   * Domains of resource
-   */
-  DOMAIN,
-  /**
-   * Data platform instance of resource
-   */
-  DATA_PLATFORM_INSTANCE
-}
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpec.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpec.java
deleted file mode 100644
index c1bd53e31fe29..0000000000000
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpec.java
+++ /dev/null
@@ -1,23 +0,0 @@
-package com.datahub.authorization;
-
-import javax.annotation.Nonnull;
-import lombok.Value;
-
-
-/**
- * Details about a specific resource being acted upon. Resource types currently supported
- * can be found inside of {@link com.linkedin.metadata.authorization.PoliciesConfig}
- */
-@Value
-public class ResourceSpec {
-  /**
-   * The resource type. Most often, this corresponds to the entity type. (dataset, chart, dashboard, corpGroup, etc).
-   */
-  @Nonnull
-  String type;
-  /**
-   * The resource identity. Most often, this corresponds to the raw entity urn. (urn:li:corpGroup:groupId)
-   */
-  @Nonnull
-  String resource;
-}
\ No newline at end of file
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpecResolver.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpecResolver.java
deleted file mode 100644
index 05c35f377b9a9..0000000000000
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/ResourceSpecResolver.java
+++ /dev/null
@@ -1,11 +0,0 @@
-package com.datahub.authorization;
-
-/**
- * A Resource Spec Resolver is responsible for resolving a {@link ResourceSpec} to a {@link ResolvedResourceSpec}.
- */
-public interface ResourceSpecResolver {
-  /**
-   Resolve a {@link ResourceSpec} to a resolved resource spec.
-   **/
-  ResolvedResourceSpec resolve(ResourceSpec resourceSpec);
-}
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/plugins/auth/authorization/Authorizer.java b/metadata-auth/auth-api/src/main/java/com/datahub/plugins/auth/authorization/Authorizer.java
index ce7a3f22b3147..c731a3ec987c1 100644
--- a/metadata-auth/auth-api/src/main/java/com/datahub/plugins/auth/authorization/Authorizer.java
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/plugins/auth/authorization/Authorizer.java
@@ -4,7 +4,7 @@
 import com.datahub.authorization.AuthorizationResult;
 import com.datahub.authorization.AuthorizedActors;
 import com.datahub.authorization.AuthorizerContext;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.datahub.plugins.Plugin;
 import java.util.Map;
 import java.util.Optional;
@@ -32,5 +32,5 @@ public interface Authorizer extends Plugin {
    * Retrieves the current list of actors authorized to for a particular privilege against
    * an optional resource
    */
-  AuthorizedActors authorizedActors(final String privilege, final Optional<ResourceSpec> resourceSpec);
+  AuthorizedActors authorizedActors(final String privilege, final Optional<EntitySpec> resourceSpec);
 }
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/AuthorizerChain.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/AuthorizerChain.java
index d62c37160f816..f8eca541e1efb 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/AuthorizerChain.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/AuthorizerChain.java
@@ -82,7 +82,7 @@ public AuthorizationResult authorize(@Nonnull final AuthorizationRequest request
   }
 
   @Override
-  public AuthorizedActors authorizedActors(String privilege, Optional<ResourceSpec> resourceSpec) {
+  public AuthorizedActors authorizedActors(String privilege, Optional<EntitySpec> resourceSpec) {
     if (this.authorizers.isEmpty()) {
       return null;
     }
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java
index f653ccf72cf54..4553139e3ca54 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java
@@ -8,6 +8,8 @@
 import com.linkedin.entity.client.EntityClient;
 import com.linkedin.metadata.authorization.PoliciesConfig;
 import com.linkedin.policy.DataHubPolicyInfo;
+
+import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -55,7 +57,7 @@ public enum AuthorizationMode {
   private final ScheduledExecutorService _refreshExecutorService = Executors.newScheduledThreadPool(1);
   private final PolicyRefreshRunnable _policyRefreshRunnable;
   private final PolicyEngine _policyEngine;
-  private ResourceSpecResolver _resourceSpecResolver;
+  private EntitySpecResolver _entitySpecResolver;
   private AuthorizationMode _mode;
 
   public static final String ALL = "ALL";
@@ -76,7 +78,7 @@ public DataHubAuthorizer(
   @Override
   public void init(@Nonnull Map<String, Object> authorizerConfig, @Nonnull AuthorizerContext ctx) {
     // Pass. No static config.
-    _resourceSpecResolver = Objects.requireNonNull(ctx.getResourceSpecResolver());
+    _entitySpecResolver = Objects.requireNonNull(ctx.getEntitySpecResolver());
   }
 
   public AuthorizationResult authorize(@Nonnull final AuthorizationRequest request) {
@@ -86,7 +88,7 @@ public AuthorizationResult authorize(@Nonnull final AuthorizationRequest request
       return new AuthorizationResult(request, AuthorizationResult.Type.ALLOW, null);
     }
 
-    Optional<ResolvedResourceSpec> resolvedResourceSpec = request.getResourceSpec().map(_resourceSpecResolver::resolve);
+    Optional<ResolvedEntitySpec> resolvedResourceSpec = request.getResourceSpec().map(_entitySpecResolver::resolve);
 
     // 1. Fetch the policies relevant to the requested privilege.
     final List<DataHubPolicyInfo> policiesToEvaluate = _policyCache.getOrDefault(request.getPrivilege(), new ArrayList<>());
@@ -102,14 +104,17 @@ public AuthorizationResult authorize(@Nonnull final AuthorizationRequest request
     return new AuthorizationResult(request, AuthorizationResult.Type.DENY,  null);
   }
 
-  public List<String> getGrantedPrivileges(final String actorUrn, final Optional<ResourceSpec> resourceSpec) {
+  public List<String> getGrantedPrivileges(final String actor, final Optional<EntitySpec> resourceSpec) {
 
     // 1. Fetch all policies
     final List<DataHubPolicyInfo> policiesToEvaluate = _policyCache.getOrDefault(ALL, new ArrayList<>());
 
-    Optional<ResolvedResourceSpec> resolvedResourceSpec = resourceSpec.map(_resourceSpecResolver::resolve);
+    Urn actorUrn = UrnUtils.getUrn(actor);
+    final ResolvedEntitySpec resolvedActorSpec = _entitySpecResolver.resolve(new EntitySpec(actorUrn.getEntityType(), actor));
+
+    Optional<ResolvedEntitySpec> resolvedResourceSpec = resourceSpec.map(_entitySpecResolver::resolve);
 
-    return _policyEngine.getGrantedPrivileges(policiesToEvaluate, UrnUtils.getUrn(actorUrn), resolvedResourceSpec);
+    return _policyEngine.getGrantedPrivileges(policiesToEvaluate, resolvedActorSpec, resolvedResourceSpec);
   }
 
   /**
@@ -118,11 +123,11 @@ public List<String> getGrantedPrivileges(final String actorUrn, final Optional<R
    */
   public AuthorizedActors authorizedActors(
       final String privilege,
-      final Optional<ResourceSpec> resourceSpec) {
+      final Optional<EntitySpec> resourceSpec) {
     // Step 1: Find policies granting the privilege.
     final List<DataHubPolicyInfo> policiesToEvaluate = _policyCache.getOrDefault(privilege, new ArrayList<>());
 
-    Optional<ResolvedResourceSpec> resolvedResourceSpec = resourceSpec.map(_resourceSpecResolver::resolve);
+    Optional<ResolvedEntitySpec> resolvedResourceSpec = resourceSpec.map(_entitySpecResolver::resolve);
 
     final List<Urn> authorizedUsers = new ArrayList<>();
     final List<Urn> authorizedGroups = new ArrayList<>();
@@ -180,19 +185,36 @@ private boolean isSystemRequest(final AuthorizationRequest request, final Authen
   /**
    * Returns true if a policy grants the requested privilege for a given actor and resource.
    */
-  private boolean isRequestGranted(final DataHubPolicyInfo policy, final AuthorizationRequest request, final Optional<ResolvedResourceSpec> resourceSpec) {
+  private boolean isRequestGranted(final DataHubPolicyInfo policy, final AuthorizationRequest request, final Optional<ResolvedEntitySpec> resourceSpec) {
     if (AuthorizationMode.ALLOW_ALL.equals(mode())) {
       return true;
     }
+
+    Optional<Urn> actorUrn = getUrnFromRequestActor(request.getActorUrn());
+    if (actorUrn.isEmpty()) {
+      return false;
+    }
+
+    final ResolvedEntitySpec resolvedActorSpec = _entitySpecResolver.resolve(
+            new EntitySpec(actorUrn.get().getEntityType(), request.getActorUrn()));
     final PolicyEngine.PolicyEvaluationResult result = _policyEngine.evaluatePolicy(
         policy,
-        request.getActorUrn(),
+        resolvedActorSpec,
         request.getPrivilege(),
         resourceSpec
     );
     return result.isGranted();
   }
 
+  private Optional<Urn> getUrnFromRequestActor(String actor) {
+    try {
+      return Optional.of(Urn.createFromString(actor));
+    } catch (URISyntaxException e) {
+      log.error(String.format("Failed to bind actor %s to an URN. Actors must be URNs. Denying the authorization request", actor));
+      return Optional.empty();
+    }
+  }
+
   /**
    * A {@link Runnable} used to periodically fetch a new instance of the policies Cache.
    *
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultResourceSpecResolver.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultEntitySpecResolver.java
similarity index 51%
rename from metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultResourceSpecResolver.java
rename to metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultEntitySpecResolver.java
index 64c43dc8aa591..4ad14ed59c9c0 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultResourceSpecResolver.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultEntitySpecResolver.java
@@ -1,39 +1,40 @@
 package com.datahub.authorization;
 
-import com.datahub.authentication.Authentication;
 import com.datahub.authorization.fieldresolverprovider.DataPlatformInstanceFieldResolverProvider;
-import com.datahub.authorization.fieldresolverprovider.DomainFieldResolverProvider;
 import com.datahub.authorization.fieldresolverprovider.EntityTypeFieldResolverProvider;
-import com.datahub.authorization.fieldresolverprovider.EntityUrnFieldResolverProvider;
 import com.datahub.authorization.fieldresolverprovider.OwnerFieldResolverProvider;
-import com.datahub.authorization.fieldresolverprovider.ResourceFieldResolverProvider;
+import com.datahub.authentication.Authentication;
+import com.datahub.authorization.fieldresolverprovider.DomainFieldResolverProvider;
+import com.datahub.authorization.fieldresolverprovider.EntityUrnFieldResolverProvider;
+import com.datahub.authorization.fieldresolverprovider.EntityFieldResolverProvider;
+import com.datahub.authorization.fieldresolverprovider.GroupMembershipFieldResolverProvider;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.entity.client.EntityClient;
-
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
 
 
-public class DefaultResourceSpecResolver implements ResourceSpecResolver {
-  private final List<ResourceFieldResolverProvider> _resourceFieldResolverProviders;
+public class DefaultEntitySpecResolver implements EntitySpecResolver {
+  private final List<EntityFieldResolverProvider> _entityFieldResolverProviders;
 
-  public DefaultResourceSpecResolver(Authentication systemAuthentication, EntityClient entityClient) {
-    _resourceFieldResolverProviders =
+  public DefaultEntitySpecResolver(Authentication systemAuthentication, EntityClient entityClient) {
+    _entityFieldResolverProviders =
         ImmutableList.of(new EntityTypeFieldResolverProvider(), new EntityUrnFieldResolverProvider(),
             new DomainFieldResolverProvider(entityClient, systemAuthentication),
             new OwnerFieldResolverProvider(entityClient, systemAuthentication),
-            new DataPlatformInstanceFieldResolverProvider(entityClient, systemAuthentication));
+            new DataPlatformInstanceFieldResolverProvider(entityClient, systemAuthentication),
+            new GroupMembershipFieldResolverProvider(entityClient, systemAuthentication));
   }
 
   @Override
-  public ResolvedResourceSpec resolve(ResourceSpec resourceSpec) {
-    return new ResolvedResourceSpec(resourceSpec, getFieldResolvers(resourceSpec));
+  public ResolvedEntitySpec resolve(EntitySpec entitySpec) {
+    return new ResolvedEntitySpec(entitySpec, getFieldResolvers(entitySpec));
   }
 
-  private Map<ResourceFieldType, FieldResolver> getFieldResolvers(ResourceSpec resourceSpec) {
-    return _resourceFieldResolverProviders.stream()
-        .collect(Collectors.toMap(ResourceFieldResolverProvider::getFieldType,
-            hydrator -> hydrator.getFieldResolver(resourceSpec)));
+  private Map<EntityFieldType, FieldResolver> getFieldResolvers(EntitySpec entitySpec) {
+    return _entityFieldResolverProviders.stream()
+        .collect(Collectors.toMap(EntityFieldResolverProvider::getFieldType,
+            hydrator -> hydrator.getFieldResolver(entitySpec)));
   }
 }
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/FilterUtils.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/FilterUtils.java
index 76ed18e2baf78..0dbb9cd132f8a 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/FilterUtils.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/FilterUtils.java
@@ -26,7 +26,7 @@ private FilterUtils() {
    * Creates new PolicyMatchCriterion with field and value, using EQUAL PolicyMatchCondition.
    */
   @Nonnull
-  public static PolicyMatchCriterion newCriterion(@Nonnull ResourceFieldType field, @Nonnull List<String> values) {
+  public static PolicyMatchCriterion newCriterion(@Nonnull EntityFieldType field, @Nonnull List<String> values) {
     return newCriterion(field, values, PolicyMatchCondition.EQUALS);
   }
 
@@ -34,7 +34,7 @@ public static PolicyMatchCriterion newCriterion(@Nonnull ResourceFieldType field
    * Creates new PolicyMatchCriterion with field, value and PolicyMatchCondition.
    */
   @Nonnull
-  public static PolicyMatchCriterion newCriterion(@Nonnull ResourceFieldType field, @Nonnull List<String> values,
+  public static PolicyMatchCriterion newCriterion(@Nonnull EntityFieldType field, @Nonnull List<String> values,
       @Nonnull PolicyMatchCondition policyMatchCondition) {
     return new PolicyMatchCriterion().setField(field.name())
         .setValues(new StringArray(values))
@@ -45,7 +45,7 @@ public static PolicyMatchCriterion newCriterion(@Nonnull ResourceFieldType field
    * Creates new PolicyMatchFilter from a map of Criteria by removing null-valued Criteria and using EQUAL PolicyMatchCondition (default).
    */
   @Nonnull
-  public static PolicyMatchFilter newFilter(@Nullable Map<ResourceFieldType, List<String>> params) {
+  public static PolicyMatchFilter newFilter(@Nullable Map<EntityFieldType, List<String>> params) {
     if (params == null) {
       return EMPTY_FILTER;
     }
@@ -61,7 +61,7 @@ public static PolicyMatchFilter newFilter(@Nullable Map<ResourceFieldType, List<
    * Creates new PolicyMatchFilter from a single PolicyMatchCriterion with EQUAL PolicyMatchCondition (default).
    */
   @Nonnull
-  public static PolicyMatchFilter newFilter(@Nonnull ResourceFieldType field, @Nonnull List<String> values) {
+  public static PolicyMatchFilter newFilter(@Nonnull EntityFieldType field, @Nonnull List<String> values) {
     return newFilter(Collections.singletonMap(field, values));
   }
 }
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyEngine.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyEngine.java
index 6a36fac7de4e0..f8c017ea74e1f 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyEngine.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyEngine.java
@@ -1,7 +1,6 @@
 package com.datahub.authorization;
 
 import com.datahub.authentication.Authentication;
-import com.google.common.collect.ImmutableSet;
 import com.linkedin.common.Owner;
 import com.linkedin.common.Ownership;
 import com.linkedin.common.urn.Urn;
@@ -11,8 +10,6 @@
 import com.linkedin.entity.EnvelopedAspect;
 import com.linkedin.entity.EnvelopedAspectMap;
 import com.linkedin.entity.client.EntityClient;
-import com.linkedin.identity.GroupMembership;
-import com.linkedin.identity.NativeGroupMembership;
 import com.linkedin.identity.RoleMembership;
 import com.linkedin.metadata.Constants;
 import com.linkedin.metadata.authorization.PoliciesConfig;
@@ -23,7 +20,7 @@
 import com.linkedin.policy.PolicyMatchCriterion;
 import com.linkedin.policy.PolicyMatchCriterionArray;
 import com.linkedin.policy.PolicyMatchFilter;
-import java.net.URISyntaxException;
+
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashSet;
@@ -34,6 +31,7 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import javax.annotation.Nullable;
+
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 
@@ -49,37 +47,22 @@ public class PolicyEngine {
 
   public PolicyEvaluationResult evaluatePolicy(
       final DataHubPolicyInfo policy,
-      final String actorStr,
+      final ResolvedEntitySpec resolvedActorSpec,
       final String privilege,
-      final Optional<ResolvedResourceSpec> resource) {
-    try {
-      // Currently Actor must be an urn. Consider whether this contract should be pushed up.
-      final Urn actor = Urn.createFromString(actorStr);
-      return evaluatePolicy(policy, actor, privilege, resource);
-    } catch (URISyntaxException e) {
-      log.error(String.format("Failed to bind actor %s to an URN. Actors must be URNs. Denying the authorization request", actorStr));
-      return PolicyEvaluationResult.DENIED;
-    }
-  }
-
-  public PolicyEvaluationResult evaluatePolicy(
-      final DataHubPolicyInfo policy,
-      final Urn actor,
-      final String privilege,
-      final Optional<ResolvedResourceSpec> resource) {
+      final Optional<ResolvedEntitySpec> resource) {
 
     final PolicyEvaluationContext context = new PolicyEvaluationContext();
     log.debug("Evaluating policy {}", policy.getDisplayName());
 
     // If the privilege is not in scope, deny the request.
-    if (!isPrivilegeMatch(privilege, policy.getPrivileges(), context)) {
+    if (!isPrivilegeMatch(privilege, policy.getPrivileges())) {
       log.debug("Policy denied based on irrelevant privileges {} for {}", policy.getPrivileges(), privilege);
       return PolicyEvaluationResult.DENIED;
     }
 
     // If policy is not applicable, deny the request
-    if (!isPolicyApplicable(policy, actor, resource, context)) {
-      log.debug("Policy does not applicable for actor {} and resource {}", actor, resource);
+    if (!isPolicyApplicable(policy, resolvedActorSpec, resource, context)) {
+      log.debug("Policy does not applicable for actor {} and resource {}", resolvedActorSpec.getSpec().getEntity(), resource);
       return PolicyEvaluationResult.DENIED;
     }
 
@@ -89,7 +72,7 @@ public PolicyEvaluationResult evaluatePolicy(
 
   public PolicyActors getMatchingActors(
       final DataHubPolicyInfo policy,
-      final Optional<ResolvedResourceSpec> resource) {
+      final Optional<ResolvedEntitySpec> resource) {
     final List<Urn> users = new ArrayList<>();
     final List<Urn> groups = new ArrayList<>();
     boolean allUsers = false;
@@ -126,8 +109,8 @@ public PolicyActors getMatchingActors(
 
   private boolean isPolicyApplicable(
       final DataHubPolicyInfo policy,
-      final Urn actor,
-      final Optional<ResolvedResourceSpec> resource,
+      final ResolvedEntitySpec resolvedActorSpec,
+      final Optional<ResolvedEntitySpec> resource,
       final PolicyEvaluationContext context
   ) {
 
@@ -137,25 +120,21 @@ private boolean isPolicyApplicable(
     }
 
     // If the resource is not in scope, deny the request.
-    if (!isResourceMatch(policy.getType(), policy.getResources(), resource, context)) {
+    if (!isResourceMatch(policy.getType(), policy.getResources(), resource)) {
       return false;
     }
 
     // If the actor does not match, deny the request.
-    if (!isActorMatch(actor, policy.getActors(), resource, context)) {
-      return false;
-    }
-
-    return true;
+    return isActorMatch(resolvedActorSpec, policy.getActors(), resource, context);
   }
 
   public List<String> getGrantedPrivileges(
       final List<DataHubPolicyInfo> policies,
-      final Urn actor,
-      final Optional<ResolvedResourceSpec> resource) {
+      final ResolvedEntitySpec resolvedActorSpec,
+      final Optional<ResolvedEntitySpec> resource) {
     PolicyEvaluationContext context = new PolicyEvaluationContext();
     return policies.stream()
-        .filter(policy -> isPolicyApplicable(policy, actor, resource, context))
+        .filter(policy -> isPolicyApplicable(policy, resolvedActorSpec, resource, context))
         .flatMap(policy -> policy.getPrivileges().stream())
         .distinct()
         .collect(Collectors.toList());
@@ -168,9 +147,8 @@ public List<String> getGrantedPrivileges(
    * If the policy is of type "METADATA", the resourceSpec parameter will be matched against the
    * resource filter defined on the policy.
    */
-  public Boolean policyMatchesResource(final DataHubPolicyInfo policy, final Optional<ResolvedResourceSpec> resourceSpec) {
-    return isResourceMatch(policy.getType(), policy.getResources(), resourceSpec,
-        new PolicyEvaluationContext());
+  public Boolean policyMatchesResource(final DataHubPolicyInfo policy, final Optional<ResolvedEntitySpec> resourceSpec) {
+    return isResourceMatch(policy.getType(), policy.getResources(), resourceSpec);
   }
 
   /**
@@ -178,8 +156,7 @@ public Boolean policyMatchesResource(final DataHubPolicyInfo policy, final Optio
    */
   private boolean isPrivilegeMatch(
       final String requestPrivilege,
-      final List<String> policyPrivileges,
-      final PolicyEvaluationContext context) {
+      final List<String> policyPrivileges) {
     return policyPrivileges.contains(requestPrivilege);
   }
 
@@ -189,8 +166,7 @@ private boolean isPrivilegeMatch(
   private boolean isResourceMatch(
       final String policyType,
       final @Nullable DataHubResourceFilter policyResourceFilter,
-      final Optional<ResolvedResourceSpec> requestResource,
-      final PolicyEvaluationContext context) {
+      final Optional<ResolvedEntitySpec> requestResource) {
     if (PoliciesConfig.PLATFORM_POLICY_TYPE.equals(policyType)) {
       // Currently, platform policies have no associated resource.
       return true;
@@ -199,7 +175,7 @@ private boolean isResourceMatch(
       // No resource defined on the policy.
       return true;
     }
-    if (!requestResource.isPresent()) {
+    if (requestResource.isEmpty()) {
       // Resource filter present in policy, but no resource spec provided.
       log.debug("Resource filter present in policy, but no resource spec provided.");
       return false;
@@ -218,31 +194,31 @@ private PolicyMatchFilter getFilter(DataHubResourceFilter policyResourceFilter)
     }
     PolicyMatchCriterionArray criteria = new PolicyMatchCriterionArray();
     if (policyResourceFilter.hasType()) {
-      criteria.add(new PolicyMatchCriterion().setField(ResourceFieldType.RESOURCE_TYPE.name())
+      criteria.add(new PolicyMatchCriterion().setField(EntityFieldType.TYPE.name())
           .setValues(new StringArray(Collections.singletonList(policyResourceFilter.getType()))));
     }
     if (policyResourceFilter.hasType() && policyResourceFilter.hasResources()
         && !policyResourceFilter.isAllResources()) {
       criteria.add(
-          new PolicyMatchCriterion().setField(ResourceFieldType.RESOURCE_URN.name()).setValues(policyResourceFilter.getResources()));
+          new PolicyMatchCriterion().setField(EntityFieldType.URN.name()).setValues(policyResourceFilter.getResources()));
     }
     return new PolicyMatchFilter().setCriteria(criteria);
   }
 
-  private boolean checkFilter(final PolicyMatchFilter filter, final ResolvedResourceSpec resource) {
+  private boolean checkFilter(final PolicyMatchFilter filter, final ResolvedEntitySpec resource) {
     return filter.getCriteria().stream().allMatch(criterion -> checkCriterion(criterion, resource));
   }
 
-  private boolean checkCriterion(final PolicyMatchCriterion criterion, final ResolvedResourceSpec resource) {
-    ResourceFieldType resourceFieldType;
+  private boolean checkCriterion(final PolicyMatchCriterion criterion, final ResolvedEntitySpec resource) {
+    EntityFieldType entityFieldType;
     try {
-      resourceFieldType = ResourceFieldType.valueOf(criterion.getField().toUpperCase());
+      entityFieldType = EntityFieldType.valueOf(criterion.getField().toUpperCase());
     } catch (IllegalArgumentException e) {
       log.error("Unsupported field type {}", criterion.getField());
       return false;
     }
 
-    Set<String> fieldValues = resource.getFieldValues(resourceFieldType);
+    Set<String> fieldValues = resource.getFieldValues(entityFieldType);
     return criterion.getValues()
         .stream()
         .anyMatch(filterValue -> checkCondition(fieldValues, filterValue, criterion.getCondition()));
@@ -257,46 +233,51 @@ private boolean checkCondition(Set<String> fieldValues, String filterValue, Poli
   }
 
   /**
+   * Returns true if the actor portion of a DataHub policy matches a the actor being evaluated, false otherwise.
    * Returns true if the actor portion of a DataHub policy matches a the actor being evaluated, false otherwise.
    */
   private boolean isActorMatch(
-      final Urn actor,
+      final ResolvedEntitySpec resolvedActorSpec,
       final DataHubActorFilter actorFilter,
-      final Optional<ResolvedResourceSpec> resourceSpec,
+      final Optional<ResolvedEntitySpec> resourceSpec,
       final PolicyEvaluationContext context) {
 
     // 1. If the actor is a matching "User" in the actor filter, return true immediately.
-    if (isUserMatch(actor, actorFilter)) {
+    if (isUserMatch(resolvedActorSpec, actorFilter)) {
       return true;
     }
 
     // 2. If the actor is in a matching "Group" in the actor filter, return true immediately.
-    if (isGroupMatch(actor, actorFilter, context)) {
+    if (isGroupMatch(resolvedActorSpec, actorFilter, context)) {
       return true;
     }
 
     // 3. If the actor is the owner, either directly or indirectly via a group, return true immediately.
-    if (isOwnerMatch(actor, actorFilter, resourceSpec, context)) {
+    if (isOwnerMatch(resolvedActorSpec, actorFilter, resourceSpec, context)) {
       return true;
     }
 
     // 4. If the actor is in a matching "Role" in the actor filter, return true immediately.
-    return isRoleMatch(actor, actorFilter, context);
+    return isRoleMatch(resolvedActorSpec, actorFilter, context);
   }
 
-  private boolean isUserMatch(final Urn actor, final DataHubActorFilter actorFilter) {
+  private boolean isUserMatch(final ResolvedEntitySpec resolvedActorSpec, final DataHubActorFilter actorFilter) {
     // If the actor is a matching "User" in the actor filter, return true immediately.
     return actorFilter.isAllUsers() || (actorFilter.hasUsers() && Objects.requireNonNull(actorFilter.getUsers())
-        .stream()
-        .anyMatch(user -> user.equals(actor)));
+        .stream().map(Urn::toString)
+        .anyMatch(user -> user.equals(resolvedActorSpec.getSpec().getEntity())));
   }
 
-  private boolean isGroupMatch(final Urn actor, final DataHubActorFilter actorFilter, final PolicyEvaluationContext context) {
+  private boolean isGroupMatch(
+      final ResolvedEntitySpec resolvedActorSpec,
+      final DataHubActorFilter actorFilter,
+      final PolicyEvaluationContext context) {
     // If the actor is in a matching "Group" in the actor filter, return true immediately.
     if (actorFilter.isAllGroups() || actorFilter.hasGroups()) {
-      final Set<Urn> groups = resolveGroups(actor, context);
-      return actorFilter.isAllGroups() || (actorFilter.hasGroups() && Objects.requireNonNull(actorFilter.getGroups())
-          .stream()
+      final Set<String> groups = resolveGroups(resolvedActorSpec, context);
+      return (actorFilter.isAllGroups() && !groups.isEmpty())
+          || (actorFilter.hasGroups() && Objects.requireNonNull(actorFilter.getGroups())
+          .stream().map(Urn::toString)
           .anyMatch(groups::contains));
     }
     // If there are no groups on the policy, return false for the group match.
@@ -304,24 +285,24 @@ private boolean isGroupMatch(final Urn actor, final DataHubActorFilter actorFilt
   }
 
   private boolean isOwnerMatch(
-      final Urn actor,
+      final ResolvedEntitySpec resolvedActorSpec,
       final DataHubActorFilter actorFilter,
-      final Optional<ResolvedResourceSpec> requestResource,
+      final Optional<ResolvedEntitySpec> requestResource,
       final PolicyEvaluationContext context) {
     // If the policy does not apply to owners, or there is no resource to own, return false immediately.
-    if (!actorFilter.isResourceOwners() || !requestResource.isPresent()) {
+    if (!actorFilter.isResourceOwners() || requestResource.isEmpty()) {
       return false;
     }
     List<Urn> ownershipTypes = actorFilter.getResourceOwnersTypes();
-    return isActorOwner(actor, requestResource.get(), ownershipTypes, context);
+    return isActorOwner(resolvedActorSpec, requestResource.get(), ownershipTypes, context);
   }
 
-  private Set<String> getOwnersForType(ResourceSpec resourceSpec, List<Urn> ownershipTypes) {
-    Urn entityUrn = UrnUtils.getUrn(resourceSpec.getResource());
+  private Set<String> getOwnersForType(EntitySpec resourceSpec, List<Urn> ownershipTypes) {
+    Urn entityUrn = UrnUtils.getUrn(resourceSpec.getEntity());
     EnvelopedAspect ownershipAspect;
     try {
       EntityResponse response = _entityClient.getV2(entityUrn.getEntityType(), entityUrn,
-              Collections.singleton(Constants.OWNERSHIP_ASPECT_NAME), _systemAuthentication);
+          Collections.singleton(Constants.OWNERSHIP_ASPECT_NAME), _systemAuthentication);
       if (response == null || !response.getAspects().containsKey(Constants.OWNERSHIP_ASPECT_NAME)) {
         return Collections.emptySet();
       }
@@ -338,50 +319,56 @@ private Set<String> getOwnersForType(ResourceSpec resourceSpec, List<Urn> owners
     return ownersStream.map(owner -> owner.getOwner().toString()).collect(Collectors.toSet());
   }
 
-  private boolean isActorOwner(Urn actor, ResolvedResourceSpec resourceSpec, List<Urn> ownershipTypes, PolicyEvaluationContext context) {
+  private boolean isActorOwner(
+      final ResolvedEntitySpec resolvedActorSpec,
+      ResolvedEntitySpec resourceSpec, List<Urn> ownershipTypes,
+      PolicyEvaluationContext context) {
     Set<String> owners = this.getOwnersForType(resourceSpec.getSpec(), ownershipTypes);
-    if (isUserOwner(actor, owners)) {
-      return true;
-    }
-    final Set<Urn> groups = resolveGroups(actor, context);
-    if (isGroupOwner(groups, owners)) {
+    if (isUserOwner(resolvedActorSpec, owners)) {
       return true;
     }
-    return false;
+    final Set<String> groups = resolveGroups(resolvedActorSpec, context);
+
+    return isGroupOwner(groups, owners);
   }
 
-  private boolean isUserOwner(Urn actor, Set<String> owners) {
-    return owners.contains(actor.toString());
+  private boolean isUserOwner(final ResolvedEntitySpec resolvedActorSpec, Set<String> owners) {
+    return owners.contains(resolvedActorSpec.getSpec().getEntity());
   }
 
-  private boolean isGroupOwner(Set<Urn> groups, Set<String> owners) {
-    return groups.stream().anyMatch(group -> owners.contains(group.toString()));
+  private boolean isGroupOwner(Set<String> groups, Set<String> owners) {
+    return groups.stream().anyMatch(owners::contains);
   }
 
-  private boolean isRoleMatch(final Urn actor, final DataHubActorFilter actorFilter,
+  private boolean isRoleMatch(
+      final ResolvedEntitySpec resolvedActorSpec,
+      final DataHubActorFilter actorFilter,
       final PolicyEvaluationContext context) {
     // Can immediately return false if the actor filter does not have any roles
     if (!actorFilter.hasRoles()) {
       return false;
     }
     // If the actor has a matching "Role" in the actor filter, return true immediately.
-    Set<Urn> actorRoles = resolveRoles(actor, context);
+    Set<Urn> actorRoles = resolveRoles(resolvedActorSpec, context);
     return Objects.requireNonNull(actorFilter.getRoles())
         .stream()
         .anyMatch(actorRoles::contains);
   }
 
-  private Set<Urn> resolveRoles(Urn actor, PolicyEvaluationContext context) {
+  private Set<Urn> resolveRoles(final ResolvedEntitySpec resolvedActorSpec, PolicyEvaluationContext context) {
     if (context.roles != null) {
       return context.roles;
     }
 
+    String actor = resolvedActorSpec.getSpec().getEntity();
+
     Set<Urn> roles = new HashSet<>();
     final EnvelopedAspectMap aspectMap;
 
     try {
-      final EntityResponse corpUser = _entityClient.batchGetV2(CORP_USER_ENTITY_NAME, Collections.singleton(actor),
-          Collections.singleton(ROLE_MEMBERSHIP_ASPECT_NAME), _systemAuthentication).get(actor);
+      Urn actorUrn = Urn.createFromString(actor);
+      final EntityResponse corpUser = _entityClient.batchGetV2(CORP_USER_ENTITY_NAME, Collections.singleton(actorUrn),
+          Collections.singleton(ROLE_MEMBERSHIP_ASPECT_NAME), _systemAuthentication).get(actorUrn);
       if (corpUser == null || !corpUser.hasAspects()) {
         return roles;
       }
@@ -403,62 +390,25 @@ private Set<Urn> resolveRoles(Urn actor, PolicyEvaluationContext context) {
     return roles;
   }
 
-  private Set<Urn> resolveGroups(Urn actor, PolicyEvaluationContext context) {
+  private Set<String> resolveGroups(ResolvedEntitySpec resolvedActorSpec, PolicyEvaluationContext context) {
     if (context.groups != null) {
       return context.groups;
     }
 
-    Set<Urn> groups = new HashSet<>();
-    final EnvelopedAspectMap aspectMap;
-
-    try {
-      final EntityResponse corpUser = _entityClient.batchGetV2(CORP_USER_ENTITY_NAME, Collections.singleton(actor),
-              ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME), _systemAuthentication)
-          .get(actor);
-      if (corpUser == null || !corpUser.hasAspects()) {
-        return groups;
-      }
-      aspectMap = corpUser.getAspects();
-    } catch (Exception e) {
-      throw new RuntimeException(String.format("Failed to fetch %s and %s for urn %s", GROUP_MEMBERSHIP_ASPECT_NAME,
-          NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME, actor), e);
-    }
-
-    Optional<GroupMembership> maybeGroupMembership = resolveGroupMembership(aspectMap);
-    maybeGroupMembership.ifPresent(groupMembership -> groups.addAll(groupMembership.getGroups()));
-
-    Optional<NativeGroupMembership> maybeNativeGroupMembership = resolveNativeGroupMembership(aspectMap);
-    maybeNativeGroupMembership.ifPresent(
-        nativeGroupMembership -> groups.addAll(nativeGroupMembership.getNativeGroups()));
+    Set<String> groups = resolvedActorSpec.getGroupMembership();
 
     context.setGroups(groups); // Cache the groups.
     return groups;
   }
 
-  // TODO: Optimization - Cache the group membership. Refresh periodically.
-  private Optional<GroupMembership> resolveGroupMembership(final EnvelopedAspectMap aspectMap) {
-    if (aspectMap.containsKey(GROUP_MEMBERSHIP_ASPECT_NAME)) {
-      return Optional.of(new GroupMembership(aspectMap.get(GROUP_MEMBERSHIP_ASPECT_NAME).getValue().data()));
-    }
-    return Optional.empty();
-  }
-
-  private Optional<NativeGroupMembership> resolveNativeGroupMembership(final EnvelopedAspectMap aspectMap) {
-    if (aspectMap.containsKey(NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)) {
-      return Optional.of(
-          new NativeGroupMembership(aspectMap.get(NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME).getValue().data()));
-    }
-    return Optional.empty();
-  }
-
   /**
    * Class used to store state across a single Policy evaluation.
    */
   static class PolicyEvaluationContext {
-    private Set<Urn> groups;
+    private Set<String> groups;
     private Set<Urn> roles;
 
-    public void setGroups(Set<Urn> groups) {
+    public void setGroups(Set<String> groups) {
       this.groups = groups;
     }
 
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java
index cd838625c2ca1..27cb8fcee8138 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java
@@ -1,45 +1,45 @@
 package com.datahub.authorization.fieldresolverprovider;
 
+import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME;
+import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ENTITY_NAME;
+
 import com.datahub.authentication.Authentication;
+import com.datahub.authorization.EntityFieldType;
+import com.datahub.authorization.EntitySpec;
 import com.datahub.authorization.FieldResolver;
-import com.datahub.authorization.ResourceFieldType;
-import com.datahub.authorization.ResourceSpec;
 import com.linkedin.common.DataPlatformInstance;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.entity.EntityResponse;
 import com.linkedin.entity.EnvelopedAspect;
 import com.linkedin.entity.client.EntityClient;
-import lombok.RequiredArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-
 import java.util.Collections;
 import java.util.Objects;
-
-import static com.linkedin.metadata.Constants.*;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
 
 /**
  * Provides field resolver for domain given resourceSpec
  */
 @Slf4j
 @RequiredArgsConstructor
-public class DataPlatformInstanceFieldResolverProvider implements ResourceFieldResolverProvider {
+public class DataPlatformInstanceFieldResolverProvider implements EntityFieldResolverProvider {
 
   private final EntityClient _entityClient;
   private final Authentication _systemAuthentication;
 
   @Override
-  public ResourceFieldType getFieldType() {
-    return ResourceFieldType.DATA_PLATFORM_INSTANCE;
+  public EntityFieldType getFieldType() {
+    return EntityFieldType.DATA_PLATFORM_INSTANCE;
   }
 
   @Override
-  public FieldResolver getFieldResolver(ResourceSpec resourceSpec) {
-    return FieldResolver.getResolverFromFunction(resourceSpec, this::getDataPlatformInstance);
+  public FieldResolver getFieldResolver(EntitySpec entitySpec) {
+    return FieldResolver.getResolverFromFunction(entitySpec, this::getDataPlatformInstance);
   }
 
-  private FieldResolver.FieldValue getDataPlatformInstance(ResourceSpec resourceSpec) {
-    Urn entityUrn = UrnUtils.getUrn(resourceSpec.getResource());
+  private FieldResolver.FieldValue getDataPlatformInstance(EntitySpec entitySpec) {
+    Urn entityUrn = UrnUtils.getUrn(entitySpec.getEntity());
     // In the case that the entity is a platform instance, the associated platform instance entity is the instance itself
     if (entityUrn.getEntityType().equals(DATA_PLATFORM_INSTANCE_ENTITY_NAME)) {
       return FieldResolver.FieldValue.builder()
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java
index 68c1dd4f644e5..25c2165f02b94 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java
@@ -2,8 +2,8 @@
 
 import com.datahub.authentication.Authentication;
 import com.datahub.authorization.FieldResolver;
-import com.datahub.authorization.ResourceFieldType;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntityFieldType;
+import com.datahub.authorization.EntitySpec;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.domain.DomainProperties;
@@ -27,23 +27,23 @@
 
 
 /**
- * Provides field resolver for domain given resourceSpec
+ * Provides field resolver for domain given entitySpec
  */
 @Slf4j
 @RequiredArgsConstructor
-public class DomainFieldResolverProvider implements ResourceFieldResolverProvider {
+public class DomainFieldResolverProvider implements EntityFieldResolverProvider {
 
   private final EntityClient _entityClient;
   private final Authentication _systemAuthentication;
 
   @Override
-  public ResourceFieldType getFieldType() {
-    return ResourceFieldType.DOMAIN;
+  public EntityFieldType getFieldType() {
+    return EntityFieldType.DOMAIN;
   }
 
   @Override
-  public FieldResolver getFieldResolver(ResourceSpec resourceSpec) {
-    return FieldResolver.getResolverFromFunction(resourceSpec, this::getDomains);
+  public FieldResolver getFieldResolver(EntitySpec entitySpec) {
+    return FieldResolver.getResolverFromFunction(entitySpec, this::getDomains);
   }
 
   private Set<Urn> getBatchedParentDomains(@Nonnull final Set<Urn> urns) {
@@ -78,8 +78,8 @@ private Set<Urn> getBatchedParentDomains(@Nonnull final Set<Urn> urns) {
     return parentUrns;
   }
 
-  private FieldResolver.FieldValue getDomains(ResourceSpec resourceSpec) {
-    final Urn entityUrn = UrnUtils.getUrn(resourceSpec.getResource());
+  private FieldResolver.FieldValue getDomains(EntitySpec entitySpec) {
+    final Urn entityUrn = UrnUtils.getUrn(entitySpec.getEntity());
     // In the case that the entity is a domain, the associated domain is the domain itself
     if (entityUrn.getEntityType().equals(DOMAIN_ENTITY_NAME)) {
       return FieldResolver.FieldValue.builder()
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityFieldResolverProvider.java
new file mode 100644
index 0000000000000..a76db0ecb5102
--- /dev/null
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityFieldResolverProvider.java
@@ -0,0 +1,22 @@
+package com.datahub.authorization.fieldresolverprovider;
+
+import com.datahub.authorization.FieldResolver;
+import com.datahub.authorization.EntityFieldType;
+import com.datahub.authorization.EntitySpec;
+
+
+/**
+ * Base class for defining a class that provides the field resolver for the given field type
+ */
+public interface EntityFieldResolverProvider {
+
+  /**
+   * Field that this hydrator is hydrating
+   */
+  EntityFieldType getFieldType();
+
+  /**
+   * Return resolver for fetching the field values given the entity
+   */
+  FieldResolver getFieldResolver(EntitySpec entitySpec);
+}
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityTypeFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityTypeFieldResolverProvider.java
index 58e3d78ce8c3b..187f696904947 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityTypeFieldResolverProvider.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityTypeFieldResolverProvider.java
@@ -1,22 +1,22 @@
 package com.datahub.authorization.fieldresolverprovider;
 
 import com.datahub.authorization.FieldResolver;
-import com.datahub.authorization.ResourceFieldType;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntityFieldType;
+import com.datahub.authorization.EntitySpec;
 import java.util.Collections;
 
 
 /**
- * Provides field resolver for entity type given resourceSpec
+ * Provides field resolver for entity type given entitySpec
  */
-public class EntityTypeFieldResolverProvider implements ResourceFieldResolverProvider {
+public class EntityTypeFieldResolverProvider implements EntityFieldResolverProvider {
   @Override
-  public ResourceFieldType getFieldType() {
-    return ResourceFieldType.RESOURCE_TYPE;
+  public EntityFieldType getFieldType() {
+    return EntityFieldType.TYPE;
   }
 
   @Override
-  public FieldResolver getFieldResolver(ResourceSpec resourceSpec) {
-    return FieldResolver.getResolverFromValues(Collections.singleton(resourceSpec.getType()));
+  public FieldResolver getFieldResolver(EntitySpec entitySpec) {
+    return FieldResolver.getResolverFromValues(Collections.singleton(entitySpec.getType()));
   }
 }
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityUrnFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityUrnFieldResolverProvider.java
index b9d98f1dcbac0..2f5c4a7c6c961 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityUrnFieldResolverProvider.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityUrnFieldResolverProvider.java
@@ -1,22 +1,22 @@
 package com.datahub.authorization.fieldresolverprovider;
 
 import com.datahub.authorization.FieldResolver;
-import com.datahub.authorization.ResourceFieldType;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntityFieldType;
+import com.datahub.authorization.EntitySpec;
 import java.util.Collections;
 
 
 /**
- * Provides field resolver for entity urn given resourceSpec
+ * Provides field resolver for entity urn given entitySpec
  */
-public class EntityUrnFieldResolverProvider implements ResourceFieldResolverProvider {
+public class EntityUrnFieldResolverProvider implements EntityFieldResolverProvider {
   @Override
-  public ResourceFieldType getFieldType() {
-    return ResourceFieldType.RESOURCE_URN;
+  public EntityFieldType getFieldType() {
+    return EntityFieldType.URN;
   }
 
   @Override
-  public FieldResolver getFieldResolver(ResourceSpec resourceSpec) {
-    return FieldResolver.getResolverFromValues(Collections.singleton(resourceSpec.getResource()));
+  public FieldResolver getFieldResolver(EntitySpec entitySpec) {
+    return FieldResolver.getResolverFromValues(Collections.singleton(entitySpec.getEntity()));
   }
 }
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProvider.java
new file mode 100644
index 0000000000000..8db029632d7e2
--- /dev/null
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProvider.java
@@ -0,0 +1,78 @@
+package com.datahub.authorization.fieldresolverprovider;
+
+import com.datahub.authentication.Authentication;
+import com.datahub.authorization.FieldResolver;
+import com.datahub.authorization.EntityFieldType;
+import com.datahub.authorization.EntitySpec;
+import com.google.common.collect.ImmutableSet;
+import com.linkedin.common.urn.Urn;
+import com.linkedin.common.urn.UrnUtils;
+import com.linkedin.entity.EntityResponse;
+import com.linkedin.entity.EnvelopedAspect;
+import com.linkedin.entity.client.EntityClient;
+import com.linkedin.identity.NativeGroupMembership;
+import com.linkedin.metadata.Constants;
+import com.linkedin.identity.GroupMembership;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static com.linkedin.metadata.Constants.GROUP_MEMBERSHIP_ASPECT_NAME;
+import static com.linkedin.metadata.Constants.NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME;
+
+
+/**
+ * Provides field resolver for owners given entitySpec
+ */
+@Slf4j
+@RequiredArgsConstructor
+public class GroupMembershipFieldResolverProvider implements EntityFieldResolverProvider {
+
+  private final EntityClient _entityClient;
+  private final Authentication _systemAuthentication;
+
+  @Override
+  public EntityFieldType getFieldType() {
+    return EntityFieldType.GROUP_MEMBERSHIP;
+  }
+
+  @Override
+  public FieldResolver getFieldResolver(EntitySpec entitySpec) {
+    return FieldResolver.getResolverFromFunction(entitySpec, this::getGroupMembership);
+  }
+
+  private FieldResolver.FieldValue getGroupMembership(EntitySpec entitySpec) {
+    Urn entityUrn = UrnUtils.getUrn(entitySpec.getEntity());
+    EnvelopedAspect groupMembershipAspect;
+    EnvelopedAspect nativeGroupMembershipAspect;
+    List<Urn> groups = new ArrayList<>();
+    try {
+      EntityResponse response = _entityClient.getV2(entityUrn.getEntityType(), entityUrn,
+              ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME), _systemAuthentication);
+      if (response == null
+              || !(response.getAspects().containsKey(Constants.GROUP_MEMBERSHIP_ASPECT_NAME)
+              || response.getAspects().containsKey(Constants.NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME))) {
+        return FieldResolver.emptyFieldValue();
+      }
+      if (response.getAspects().containsKey(Constants.GROUP_MEMBERSHIP_ASPECT_NAME)) {
+        groupMembershipAspect = response.getAspects().get(Constants.GROUP_MEMBERSHIP_ASPECT_NAME);
+        GroupMembership groupMembership = new GroupMembership(groupMembershipAspect.getValue().data());
+        groups.addAll(groupMembership.getGroups());
+      }
+      if (response.getAspects().containsKey(Constants.NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)) {
+        nativeGroupMembershipAspect = response.getAspects().get(Constants.NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME);
+        NativeGroupMembership nativeGroupMembership = new NativeGroupMembership(nativeGroupMembershipAspect.getValue().data());
+        groups.addAll(nativeGroupMembership.getNativeGroups());
+      }
+    } catch (Exception e) {
+      log.error("Error while retrieving group membership aspect for urn {}", entityUrn, e);
+      return FieldResolver.emptyFieldValue();
+    }
+    return FieldResolver.FieldValue.builder()
+        .values(groups.stream().map(Urn::toString).collect(Collectors.toSet()))
+        .build();
+  }
+}
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/OwnerFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/OwnerFieldResolverProvider.java
index 20ec6a09377c8..bdd652d1d3871 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/OwnerFieldResolverProvider.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/OwnerFieldResolverProvider.java
@@ -2,8 +2,8 @@
 
 import com.datahub.authentication.Authentication;
 import com.datahub.authorization.FieldResolver;
-import com.datahub.authorization.ResourceFieldType;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntityFieldType;
+import com.datahub.authorization.EntitySpec;
 import com.linkedin.common.Ownership;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
@@ -18,27 +18,27 @@
 
 
 /**
- * Provides field resolver for owners given resourceSpec
+ * Provides field resolver for owners given entitySpec
  */
 @Slf4j
 @RequiredArgsConstructor
-public class OwnerFieldResolverProvider implements ResourceFieldResolverProvider {
+public class OwnerFieldResolverProvider implements EntityFieldResolverProvider {
 
   private final EntityClient _entityClient;
   private final Authentication _systemAuthentication;
 
   @Override
-  public ResourceFieldType getFieldType() {
-    return ResourceFieldType.OWNER;
+  public EntityFieldType getFieldType() {
+    return EntityFieldType.OWNER;
   }
 
   @Override
-  public FieldResolver getFieldResolver(ResourceSpec resourceSpec) {
-    return FieldResolver.getResolverFromFunction(resourceSpec, this::getOwners);
+  public FieldResolver getFieldResolver(EntitySpec entitySpec) {
+    return FieldResolver.getResolverFromFunction(entitySpec, this::getOwners);
   }
 
-  private FieldResolver.FieldValue getOwners(ResourceSpec resourceSpec) {
-    Urn entityUrn = UrnUtils.getUrn(resourceSpec.getResource());
+  private FieldResolver.FieldValue getOwners(EntitySpec entitySpec) {
+    Urn entityUrn = UrnUtils.getUrn(entitySpec.getEntity());
     EnvelopedAspect ownershipAspect;
     try {
       EntityResponse response = _entityClient.getV2(entityUrn.getEntityType(), entityUrn,
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/ResourceFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/ResourceFieldResolverProvider.java
deleted file mode 100644
index 4ba4200f8035e..0000000000000
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/ResourceFieldResolverProvider.java
+++ /dev/null
@@ -1,22 +0,0 @@
-package com.datahub.authorization.fieldresolverprovider;
-
-import com.datahub.authorization.FieldResolver;
-import com.datahub.authorization.ResourceFieldType;
-import com.datahub.authorization.ResourceSpec;
-
-
-/**
- * Base class for defining a class that provides the field resolver for the given field type
- */
-public interface ResourceFieldResolverProvider {
-
-  /**
-   * Field that this hydrator is hydrating
-   */
-  ResourceFieldType getFieldType();
-
-  /**
-   * Return resolver for fetching the field values given the resource
-   */
-  FieldResolver getFieldResolver(ResourceSpec resourceSpec);
-}
diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java
index 2e48123fb1813..24ecfa6fefc85 100644
--- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java
+++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java
@@ -158,7 +158,7 @@ public void testSystemAuthentication() throws Exception {
 
     // Validate that the System Actor is authorized, even if there is no policy.
 
-    ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test");
+    EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test");
 
     AuthorizationRequest request = new AuthorizationRequest(
         new Actor(ActorType.USER, DATAHUB_SYSTEM_CLIENT_ID).toUrnStr(),
@@ -172,7 +172,7 @@ public void testSystemAuthentication() throws Exception {
   @Test
   public void testAuthorizeGranted() throws Exception {
 
-    ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test");
+    EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test");
 
     AuthorizationRequest request = new AuthorizationRequest(
         "urn:li:corpuser:test",
@@ -186,7 +186,7 @@ public void testAuthorizeGranted() throws Exception {
   @Test
   public void testAuthorizeNotGranted() throws Exception {
 
-    ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test");
+    EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test");
 
     // Policy for this privilege is inactive.
     AuthorizationRequest request = new AuthorizationRequest(
@@ -203,7 +203,7 @@ public void testAllowAllMode() throws Exception {
 
     _dataHubAuthorizer.setMode(DataHubAuthorizer.AuthorizationMode.ALLOW_ALL);
 
-    ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test");
+    EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test");
 
     // Policy for this privilege is inactive.
     AuthorizationRequest request = new AuthorizationRequest(
@@ -219,7 +219,7 @@ public void testAllowAllMode() throws Exception {
   public void testInvalidateCache() throws Exception {
 
     // First make sure that the default policies are as expected.
-    ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test");
+    EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test");
 
     AuthorizationRequest request = new AuthorizationRequest(
         "urn:li:corpuser:test",
@@ -250,7 +250,7 @@ public void testInvalidateCache() throws Exception {
   public void testAuthorizedActorsActivePolicy() throws Exception {
     final AuthorizedActors actors =
         _dataHubAuthorizer.authorizedActors("EDIT_ENTITY_TAGS", // Should be inside the active policy.
-            Optional.of(new ResourceSpec("dataset", "urn:li:dataset:1")));
+            Optional.of(new EntitySpec("dataset", "urn:li:dataset:1")));
 
     assertTrue(actors.isAllUsers());
     assertTrue(actors.isAllGroups());
@@ -272,7 +272,7 @@ public void testAuthorizedActorsActivePolicy() throws Exception {
 
   @Test
   public void testAuthorizationOnDomainWithPrivilegeIsAllowed() {
-    ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test");
+    EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test");
 
     AuthorizationRequest request = new AuthorizationRequest(
         "urn:li:corpuser:test",
@@ -285,7 +285,7 @@ public void testAuthorizationOnDomainWithPrivilegeIsAllowed() {
 
   @Test
   public void testAuthorizationOnDomainWithParentPrivilegeIsAllowed() {
-    ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test");
+    EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test");
 
     AuthorizationRequest request = new AuthorizationRequest(
         "urn:li:corpuser:test",
@@ -298,7 +298,7 @@ public void testAuthorizationOnDomainWithParentPrivilegeIsAllowed() {
 
   @Test
   public void testAuthorizationOnDomainWithoutPrivilegeIsDenied() {
-    ResourceSpec resourceSpec = new ResourceSpec("dataset", "urn:li:dataset:test");
+    EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test");
 
     AuthorizationRequest request = new AuthorizationRequest(
         "urn:li:corpuser:test",
@@ -334,7 +334,7 @@ private DataHubPolicyInfo createDataHubPolicyInfo(boolean active, List<String> p
     resourceFilter.setType("dataset");
 
     if (domain != null) {
-      resourceFilter.setFilter(FilterUtils.newFilter(ImmutableMap.of(ResourceFieldType.DOMAIN, Collections.singletonList(domain.toString()))));
+      resourceFilter.setFilter(FilterUtils.newFilter(ImmutableMap.of(EntityFieldType.DOMAIN, Collections.singletonList(domain.toString()))));
     }
 
     dataHubPolicyInfo.setResources(resourceFilter);
@@ -398,6 +398,6 @@ private Map<Urn, EntityResponse> createDomainPropertiesBatchResponse(@Nullable f
   }
 
   private AuthorizerContext createAuthorizerContext(final Authentication systemAuthentication, final EntityClient entityClient) {
-    return new AuthorizerContext(Collections.emptyMap(), new DefaultResourceSpecResolver(systemAuthentication, entityClient));
+    return new AuthorizerContext(Collections.emptyMap(), new DefaultEntitySpecResolver(systemAuthentication, entityClient));
   }
 }
diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/PolicyEngineTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/PolicyEngineTest.java
index 99d8fee309d91..be8c948f8ef89 100644
--- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/PolicyEngineTest.java
+++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/PolicyEngineTest.java
@@ -11,15 +11,12 @@
 import com.linkedin.common.OwnershipType;
 import com.linkedin.common.UrnArray;
 import com.linkedin.common.urn.Urn;
-import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.data.template.StringArray;
 import com.linkedin.entity.Aspect;
 import com.linkedin.entity.EntityResponse;
 import com.linkedin.entity.EnvelopedAspect;
 import com.linkedin.entity.EnvelopedAspectMap;
 import com.linkedin.entity.client.EntityClient;
-import com.linkedin.identity.CorpUserInfo;
-import com.linkedin.identity.GroupMembership;
 import com.linkedin.identity.RoleMembership;
 import com.linkedin.metadata.Constants;
 import com.linkedin.policy.DataHubActorFilter;
@@ -45,22 +42,19 @@ public class PolicyEngineTest {
 
   private static final String AUTHORIZED_PRINCIPAL = "urn:li:corpuser:datahub";
   private static final String UNAUTHORIZED_PRINCIPAL = "urn:li:corpuser:unauthorized";
-
   private static final String AUTHORIZED_GROUP = "urn:li:corpGroup:authorizedGroup";
-
   private static final String RESOURCE_URN = "urn:li:dataset:test";
-
   private static final String DOMAIN_URN = "urn:li:domain:domain1";
-
   private static final String OWNERSHIP_TYPE_URN = "urn:li:ownershipType:__system__technical_owner";
-
   private static final String OTHER_OWNERSHIP_TYPE_URN = "urn:li:ownershipType:__system__data_steward";
 
   private EntityClient _entityClient;
   private PolicyEngine _policyEngine;
 
   private Urn authorizedUserUrn;
+  private ResolvedEntitySpec resolvedAuthorizedUserSpec;
   private Urn unauthorizedUserUrn;
+  private ResolvedEntitySpec resolvedUnauthorizedUserSpec;
   private Urn resourceUrn;
 
   @BeforeMethod
@@ -68,29 +62,34 @@ public void setupTest() throws Exception {
     _entityClient = Mockito.mock(EntityClient.class);
     _policyEngine = new PolicyEngine(Mockito.mock(Authentication.class), _entityClient);
 
-    // Init mocks.
-    EntityResponse authorizedEntityResponse = createAuthorizedEntityResponse();
     authorizedUserUrn = Urn.createFromString(AUTHORIZED_PRINCIPAL);
+    resolvedAuthorizedUserSpec = buildEntityResolvers(CORP_USER_ENTITY_NAME, AUTHORIZED_PRINCIPAL,
+        Collections.emptySet(), Collections.emptySet(), Collections.singleton(AUTHORIZED_GROUP));
+    unauthorizedUserUrn = Urn.createFromString(UNAUTHORIZED_PRINCIPAL);
+    resolvedUnauthorizedUserSpec = buildEntityResolvers(CORP_USER_ENTITY_NAME, UNAUTHORIZED_PRINCIPAL);
+    resourceUrn = Urn.createFromString(RESOURCE_URN);
+
+    // Init role membership mocks.
+    EntityResponse authorizedEntityResponse = createAuthorizedEntityResponse();
     authorizedEntityResponse.setUrn(authorizedUserUrn);
     Map<Urn, EntityResponse> authorizedEntityResponseMap =
         Collections.singletonMap(authorizedUserUrn, authorizedEntityResponse);
-    when(_entityClient.batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)), any(),
-        any())).thenReturn(authorizedEntityResponseMap);
+    when(_entityClient.batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)),
+        eq(Collections.singleton(ROLE_MEMBERSHIP_ASPECT_NAME)), any())).thenReturn(authorizedEntityResponseMap);
 
     EntityResponse unauthorizedEntityResponse = createUnauthorizedEntityResponse();
-    unauthorizedUserUrn = Urn.createFromString(UNAUTHORIZED_PRINCIPAL);
     unauthorizedEntityResponse.setUrn(unauthorizedUserUrn);
     Map<Urn, EntityResponse> unauthorizedEntityResponseMap =
         Collections.singletonMap(unauthorizedUserUrn, unauthorizedEntityResponse);
-    when(_entityClient.batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(unauthorizedUserUrn)), any(),
-        any())).thenReturn(unauthorizedEntityResponseMap);
+    when(_entityClient.batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(unauthorizedUserUrn)),
+        eq(Collections.singleton(ROLE_MEMBERSHIP_ASPECT_NAME)), any())).thenReturn(unauthorizedEntityResponseMap);
 
+    // Init ownership type mocks.
     EntityResponse entityResponse = new EntityResponse();
     EnvelopedAspectMap envelopedAspectMap = new EnvelopedAspectMap();
     envelopedAspectMap.put(OWNERSHIP_ASPECT_NAME,
         new EnvelopedAspect().setValue(new com.linkedin.entity.Aspect(createOwnershipAspect(true, true).data())));
     entityResponse.setAspects(envelopedAspectMap);
-    resourceUrn = Urn.createFromString(RESOURCE_URN);
     Map<Urn, EntityResponse> mockMap = mock(Map.class);
     when(_entityClient.batchGetV2(any(), eq(Collections.singleton(resourceUrn)),
         eq(Collections.singleton(OWNERSHIP_ASPECT_NAME)), any())).thenReturn(mockMap);
@@ -120,9 +119,9 @@ public void testEvaluatePolicyInactivePolicyState() {
     resourceFilter.setAllResources(true);
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     PolicyEngine.PolicyEvaluationResult result =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
 
     assertFalse(result.isGranted());
@@ -149,9 +148,9 @@ public void testEvaluatePolicyPrivilegeFilterNoMatch() throws Exception {
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     PolicyEngine.PolicyEvaluationResult result =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_OWNERS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_OWNERS",
             Optional.of(resourceSpec));
     assertFalse(result.isGranted());
 
@@ -176,7 +175,8 @@ public void testEvaluatePlatformPolicyPrivilegeFilterMatch() throws Exception {
     dataHubPolicyInfo.setActors(actorFilter);
 
     PolicyEngine.PolicyEvaluationResult result =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "MANAGE_POLICIES", Optional.empty());
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "MANAGE_POLICIES",
+            Optional.empty());
     assertTrue(result.isGranted());
 
     // Verify no network calls
@@ -208,10 +208,10 @@ public void testEvaluatePolicyActorFilterUserMatch() throws Exception {
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     // Assert Authorized user can edit entity tags.
     PolicyEngine.PolicyEvaluationResult result1 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
 
     assertTrue(result1.isGranted());
@@ -245,10 +245,10 @@ public void testEvaluatePolicyActorFilterUserNoMatch() throws Exception {
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     // Assert unauthorized user cannot edit entity tags.
     PolicyEngine.PolicyEvaluationResult result2 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, "urn:li:corpuser:test", "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, buildEntityResolvers(CORP_USER_ENTITY_NAME, "urn:li:corpuser:test"), "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
 
     assertFalse(result2.isGranted());
@@ -270,7 +270,7 @@ public void testEvaluatePolicyActorFilterGroupMatch() throws Exception {
 
     final DataHubActorFilter actorFilter = new DataHubActorFilter();
     final UrnArray groupsUrnArray = new UrnArray();
-    groupsUrnArray.add(Urn.createFromString("urn:li:corpGroup:authorizedGroup"));
+    groupsUrnArray.add(Urn.createFromString(AUTHORIZED_GROUP));
     actorFilter.setGroups(groupsUrnArray);
     actorFilter.setResourceOwners(false);
     actorFilter.setAllUsers(false);
@@ -282,16 +282,15 @@ public void testEvaluatePolicyActorFilterGroupMatch() throws Exception {
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     // Assert authorized user can edit entity tags, because of group membership.
     PolicyEngine.PolicyEvaluationResult result1 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertTrue(result1.isGranted());
 
-    // Verify we are only calling for group during these requests.
-    verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)),
-        any(), any());
+    // Verify no network calls
+    verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any());
   }
 
   @Test
@@ -307,7 +306,7 @@ public void testEvaluatePolicyActorFilterGroupNoMatch() throws Exception {
 
     final DataHubActorFilter actorFilter = new DataHubActorFilter();
     final UrnArray groupsUrnArray = new UrnArray();
-    groupsUrnArray.add(Urn.createFromString("urn:li:corpGroup:authorizedGroup"));
+    groupsUrnArray.add(Urn.createFromString(AUTHORIZED_GROUP));
     actorFilter.setGroups(groupsUrnArray);
     actorFilter.setResourceOwners(false);
     actorFilter.setAllUsers(false);
@@ -319,16 +318,15 @@ public void testEvaluatePolicyActorFilterGroupNoMatch() throws Exception {
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     // Assert unauthorized user cannot edit entity tags.
     PolicyEngine.PolicyEvaluationResult result2 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, UNAUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedUnauthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertFalse(result2.isGranted());
 
-    // Verify we are only calling for group during these requests.
-    verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME),
-        eq(Collections.singleton(unauthorizedUserUrn)), any(), any());
+    // Verify no network calls
+    verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any());
   }
 
   @Test
@@ -357,17 +355,17 @@ public void testEvaluatePolicyActorFilterRoleMatch() throws Exception {
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     // Assert authorized user can edit entity tags.
     PolicyEngine.PolicyEvaluationResult authorizedResult =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
 
     assertTrue(authorizedResult.isGranted());
 
     // Verify we are only calling for roles during these requests.
-    verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)),
-        any(), any());
+    verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME),
+        eq(Collections.singleton(authorizedUserUrn)), any(), any());
   }
 
   @Test
@@ -396,10 +394,10 @@ public void testEvaluatePolicyActorFilterNoRoleMatch() throws Exception {
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     // Assert authorized user can edit entity tags.
     PolicyEngine.PolicyEvaluationResult unauthorizedResult =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, UNAUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedUnauthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
 
     assertFalse(unauthorizedResult.isGranted());
@@ -431,16 +429,16 @@ public void testEvaluatePolicyActorFilterAllUsersMatch() throws Exception {
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     // Assert authorized user can edit entity tags, because of group membership.
     PolicyEngine.PolicyEvaluationResult result1 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertTrue(result1.isGranted());
 
     // Assert unauthorized user cannot edit entity tags.
     PolicyEngine.PolicyEvaluationResult result2 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, UNAUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedUnauthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertTrue(result2.isGranted());
 
@@ -470,24 +468,21 @@ public void testEvaluatePolicyActorFilterAllGroupsMatch() throws Exception {
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     // Assert authorized user can edit entity tags, because of group membership.
     PolicyEngine.PolicyEvaluationResult result1 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertTrue(result1.isGranted());
 
     // Assert unauthorized user cannot edit entity tags.
     PolicyEngine.PolicyEvaluationResult result2 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, UNAUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedUnauthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
-    assertTrue(result2.isGranted());
+    assertFalse(result2.isGranted());
 
-    // Verify we are only calling for group during these requests.
-    verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)),
-        any(), any());
-    verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME),
-        eq(Collections.singleton(unauthorizedUserUrn)), any(), any());
+    // Verify no network calls
+    verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any());
   }
 
   @Test
@@ -519,17 +514,17 @@ public void testEvaluatePolicyActorFilterUserResourceOwnersMatch() throws Except
     when(_entityClient.getV2(eq(resourceUrn.getEntityType()), eq(resourceUrn), eq(Collections.singleton(Constants.OWNERSHIP_ASPECT_NAME)),
             any())).thenReturn(entityResponse);
 
-    ResolvedResourceSpec resourceSpec =
-        buildResourceResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL), Collections.emptySet());
+    ResolvedEntitySpec resourceSpec =
+        buildEntityResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL), Collections.emptySet(),
+            Collections.emptySet());
     // Assert authorized user can edit entity tags, because he is a user owner.
     PolicyEngine.PolicyEvaluationResult result1 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertTrue(result1.isGranted());
 
-    // Ensure no calls for group membership.
-    verify(_entityClient, times(0)).batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)),
-        eq(null), any());
+    // Verify no network calls
+    verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any());
   }
 
   @Test
@@ -562,13 +557,17 @@ public void testEvaluatePolicyActorFilterUserResourceOwnersTypeMatch() throws Ex
     when(_entityClient.getV2(eq(resourceUrn.getEntityType()), eq(resourceUrn), eq(Collections.singleton(Constants.OWNERSHIP_ASPECT_NAME)),
             any())).thenReturn(entityResponse);
 
-    ResolvedResourceSpec resourceSpec =
-            buildResourceResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL), Collections.emptySet());
+    ResolvedEntitySpec resourceSpec =
+            buildEntityResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL), Collections.emptySet(),
+                Collections.emptySet());
     
     PolicyEngine.PolicyEvaluationResult result1 =
-            _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+            _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
                     Optional.of(resourceSpec));
     assertTrue(result1.isGranted());
+
+    // Verify no network calls
+    verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any());
   }
 
   @Test
@@ -601,13 +600,16 @@ public void testEvaluatePolicyActorFilterUserResourceOwnersTypeNoMatch() throws
     when(_entityClient.getV2(eq(resourceUrn.getEntityType()), eq(resourceUrn), eq(Collections.singleton(Constants.OWNERSHIP_ASPECT_NAME)),
             any())).thenReturn(entityResponse);
 
-    ResolvedResourceSpec resourceSpec =
-            buildResourceResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL), Collections.emptySet());
+    ResolvedEntitySpec resourceSpec =
+            buildEntityResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL), Collections.emptySet(), Collections.emptySet());
 
     PolicyEngine.PolicyEvaluationResult result1 =
-            _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+            _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
                     Optional.of(resourceSpec));
     assertFalse(result1.isGranted());
+
+    // Verify no network calls
+    verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any());
   }
 
   @Test
@@ -639,17 +641,17 @@ public void testEvaluatePolicyActorFilterGroupResourceOwnersMatch() throws Excep
     when(_entityClient.getV2(eq(resourceUrn.getEntityType()), eq(resourceUrn), eq(Collections.singleton(Constants.OWNERSHIP_ASPECT_NAME)),
             any())).thenReturn(entityResponse);
 
-    ResolvedResourceSpec resourceSpec =
-        buildResourceResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_GROUP), Collections.emptySet());
+    ResolvedEntitySpec resourceSpec =
+        buildEntityResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_GROUP), Collections.emptySet(),
+            Collections.emptySet());
     // Assert authorized user can edit entity tags, because he is a user owner.
     PolicyEngine.PolicyEvaluationResult result1 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertTrue(result1.isGranted());
 
-    // Ensure that caching of groups is working with 1 call to entity client for each principal.
-    verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)),
-        any(), any());
+    // Verify no network calls
+    verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any());
   }
 
   @Test
@@ -673,16 +675,15 @@ public void testEvaluatePolicyActorFilterGroupResourceOwnersNoMatch() throws Exc
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     // Assert unauthorized user cannot edit entity tags.
     PolicyEngine.PolicyEvaluationResult result2 =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, UNAUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedUnauthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertFalse(result2.isGranted());
 
-    // Ensure that caching of groups is working with 1 call to entity client for each principal.
-    verify(_entityClient, times(1)).batchGetV2(eq(CORP_USER_ENTITY_NAME),
-        eq(Collections.singleton(unauthorizedUserUrn)), any(), any());
+    // Verify no network calls
+    verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any());
   }
 
   @Test
@@ -706,10 +707,10 @@ public void testEvaluatePolicyResourceFilterAllResourcesMatch() throws Exception
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec =
-        buildResourceResolvers("dataset", "urn:li:dataset:random"); // A dataset Authorized principal _does not own_.
+    ResolvedEntitySpec resourceSpec =
+        buildEntityResolvers("dataset", "urn:li:dataset:random"); // A dataset Authorized principal _does not own_.
     PolicyEngine.PolicyEvaluationResult result =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertTrue(result.isGranted());
 
@@ -738,9 +739,9 @@ public void testEvaluatePolicyResourceFilterAllResourcesNoMatch() throws Excepti
     resourceFilter.setType("dataset");
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("chart", RESOURCE_URN); // Notice: Not a dataset.
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("chart", RESOURCE_URN); // Notice: Not a dataset.
     PolicyEngine.PolicyEvaluationResult result =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertFalse(result.isGranted());
 
@@ -773,9 +774,9 @@ public void testEvaluatePolicyResourceFilterSpecificResourceMatchLegacy() throws
     resourceFilter.setResources(resourceUrns);
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     PolicyEngine.PolicyEvaluationResult result =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertTrue(result.isGranted());
 
@@ -801,13 +802,13 @@ public void testEvaluatePolicyResourceFilterSpecificResourceMatch() throws Excep
 
     final DataHubResourceFilter resourceFilter = new DataHubResourceFilter();
     resourceFilter.setFilter(FilterUtils.newFilter(
-        ImmutableMap.of(ResourceFieldType.RESOURCE_TYPE, Collections.singletonList("dataset"),
-            ResourceFieldType.RESOURCE_URN, Collections.singletonList(RESOURCE_URN))));
+        ImmutableMap.of(EntityFieldType.TYPE, Collections.singletonList("dataset"),
+            EntityFieldType.URN, Collections.singletonList(RESOURCE_URN))));
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN);
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN);
     PolicyEngine.PolicyEvaluationResult result =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertTrue(result.isGranted());
 
@@ -833,14 +834,14 @@ public void testEvaluatePolicyResourceFilterSpecificResourceNoMatch() throws Exc
 
     final DataHubResourceFilter resourceFilter = new DataHubResourceFilter();
     resourceFilter.setFilter(FilterUtils.newFilter(
-        ImmutableMap.of(ResourceFieldType.RESOURCE_TYPE, Collections.singletonList("dataset"),
-            ResourceFieldType.RESOURCE_URN, Collections.singletonList(RESOURCE_URN))));
+        ImmutableMap.of(EntityFieldType.TYPE, Collections.singletonList("dataset"),
+            EntityFieldType.URN, Collections.singletonList(RESOURCE_URN))));
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec =
-        buildResourceResolvers("dataset", "urn:li:dataset:random"); // A resource not covered by the policy.
+    ResolvedEntitySpec resourceSpec =
+        buildEntityResolvers("dataset", "urn:li:dataset:random"); // A resource not covered by the policy.
     PolicyEngine.PolicyEvaluationResult result =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertFalse(result.isGranted());
 
@@ -866,14 +867,14 @@ public void testEvaluatePolicyResourceFilterSpecificResourceMatchDomain() throws
 
     final DataHubResourceFilter resourceFilter = new DataHubResourceFilter();
     resourceFilter.setFilter(FilterUtils.newFilter(
-        ImmutableMap.of(ResourceFieldType.RESOURCE_TYPE, Collections.singletonList("dataset"), ResourceFieldType.DOMAIN,
+        ImmutableMap.of(EntityFieldType.TYPE, Collections.singletonList("dataset"), EntityFieldType.DOMAIN,
             Collections.singletonList(DOMAIN_URN))));
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec =
-        buildResourceResolvers("dataset", RESOURCE_URN, Collections.emptySet(), Collections.singleton(DOMAIN_URN));
+    ResolvedEntitySpec resourceSpec =
+        buildEntityResolvers("dataset", RESOURCE_URN, Collections.emptySet(), Collections.singleton(DOMAIN_URN), Collections.emptySet());
     PolicyEngine.PolicyEvaluationResult result =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertTrue(result.isGranted());
 
@@ -899,14 +900,14 @@ public void testEvaluatePolicyResourceFilterSpecificResourceNoMatchDomain() thro
 
     final DataHubResourceFilter resourceFilter = new DataHubResourceFilter();
     resourceFilter.setFilter(FilterUtils.newFilter(
-        ImmutableMap.of(ResourceFieldType.RESOURCE_TYPE, Collections.singletonList("dataset"), ResourceFieldType.DOMAIN,
+        ImmutableMap.of(EntityFieldType.TYPE, Collections.singletonList("dataset"), EntityFieldType.DOMAIN,
             Collections.singletonList(DOMAIN_URN))));
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN, Collections.emptySet(),
-        Collections.singleton("urn:li:domain:domain2")); // Domain doesn't match
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN, Collections.emptySet(),
+        Collections.singleton("urn:li:domain:domain2"), Collections.emptySet()); // Domain doesn't match
     PolicyEngine.PolicyEvaluationResult result =
-        _policyEngine.evaluatePolicy(dataHubPolicyInfo, AUTHORIZED_PRINCIPAL, "EDIT_ENTITY_TAGS",
+        _policyEngine.evaluatePolicy(dataHubPolicyInfo, resolvedAuthorizedUserSpec, "EDIT_ENTITY_TAGS",
             Optional.of(resourceSpec));
     assertFalse(result.isGranted());
 
@@ -933,7 +934,7 @@ public void testGetGrantedPrivileges() throws Exception {
 
     final DataHubResourceFilter resourceFilter1 = new DataHubResourceFilter();
     resourceFilter1.setFilter(FilterUtils.newFilter(
-        ImmutableMap.of(ResourceFieldType.RESOURCE_TYPE, Collections.singletonList("dataset"), ResourceFieldType.DOMAIN,
+        ImmutableMap.of(EntityFieldType.TYPE, Collections.singletonList("dataset"), EntityFieldType.DOMAIN,
             Collections.singletonList(DOMAIN_URN))));
     dataHubPolicyInfo1.setResources(resourceFilter1);
 
@@ -954,8 +955,8 @@ public void testGetGrantedPrivileges() throws Exception {
 
     final DataHubResourceFilter resourceFilter2 = new DataHubResourceFilter();
     resourceFilter2.setFilter(FilterUtils.newFilter(
-        ImmutableMap.of(ResourceFieldType.RESOURCE_TYPE, Collections.singletonList("dataset"),
-            ResourceFieldType.RESOURCE_URN, Collections.singletonList(RESOURCE_URN))));
+        ImmutableMap.of(EntityFieldType.TYPE, Collections.singletonList("dataset"),
+            EntityFieldType.URN, Collections.singletonList(RESOURCE_URN))));
     dataHubPolicyInfo2.setResources(resourceFilter2);
 
     // Policy 3, match dataset type and owner (legacy resource filter)
@@ -981,25 +982,25 @@ public void testGetGrantedPrivileges() throws Exception {
     final List<DataHubPolicyInfo> policies =
         ImmutableList.of(dataHubPolicyInfo1, dataHubPolicyInfo2, dataHubPolicyInfo3);
 
-    assertEquals(_policyEngine.getGrantedPrivileges(policies, UrnUtils.getUrn(AUTHORIZED_PRINCIPAL), Optional.empty()),
+    assertEquals(_policyEngine.getGrantedPrivileges(policies, resolvedAuthorizedUserSpec, Optional.empty()),
         Collections.emptyList());
 
-    ResolvedResourceSpec resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN, Collections.emptySet(),
-        Collections.singleton(DOMAIN_URN)); // Everything matches
+    ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN, Collections.emptySet(),
+        Collections.singleton(DOMAIN_URN), Collections.emptySet()); // Everything matches
     assertEquals(
-        _policyEngine.getGrantedPrivileges(policies, UrnUtils.getUrn(AUTHORIZED_PRINCIPAL), Optional.of(resourceSpec)),
+        _policyEngine.getGrantedPrivileges(policies, resolvedAuthorizedUserSpec, Optional.of(resourceSpec)),
         ImmutableList.of("PRIVILEGE_1", "PRIVILEGE_2_1", "PRIVILEGE_2_2"));
 
-    resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN, Collections.emptySet(),
-        Collections.singleton("urn:li:domain:domain2")); // Domain doesn't match
+    resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN, Collections.emptySet(),
+        Collections.singleton("urn:li:domain:domain2"), Collections.emptySet()); // Domain doesn't match
     assertEquals(
-        _policyEngine.getGrantedPrivileges(policies, UrnUtils.getUrn(AUTHORIZED_PRINCIPAL), Optional.of(resourceSpec)),
+        _policyEngine.getGrantedPrivileges(policies, resolvedAuthorizedUserSpec, Optional.of(resourceSpec)),
         ImmutableList.of("PRIVILEGE_2_1", "PRIVILEGE_2_2"));
 
-    resourceSpec = buildResourceResolvers("dataset", "urn:li:dataset:random", Collections.emptySet(),
-        Collections.singleton(DOMAIN_URN)); // Resource doesn't match
+    resourceSpec = buildEntityResolvers("dataset", "urn:li:dataset:random", Collections.emptySet(),
+        Collections.singleton(DOMAIN_URN), Collections.emptySet()); // Resource doesn't match
     assertEquals(
-        _policyEngine.getGrantedPrivileges(policies, UrnUtils.getUrn(AUTHORIZED_PRINCIPAL), Optional.of(resourceSpec)),
+        _policyEngine.getGrantedPrivileges(policies, resolvedAuthorizedUserSpec, Optional.of(resourceSpec)),
         ImmutableList.of("PRIVILEGE_1"));
 
     final EntityResponse entityResponse = new EntityResponse();
@@ -1008,16 +1009,16 @@ public void testGetGrantedPrivileges() throws Exception {
     entityResponse.setAspects(aspectMap);
     when(_entityClient.getV2(eq(resourceUrn.getEntityType()), eq(resourceUrn), eq(Collections.singleton(Constants.OWNERSHIP_ASPECT_NAME)),
             any())).thenReturn(entityResponse);
-    resourceSpec = buildResourceResolvers("dataset", RESOURCE_URN, Collections.singleton(AUTHORIZED_PRINCIPAL),
-        Collections.singleton(DOMAIN_URN)); // Is owner
+    resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN, Collections.singleton(AUTHORIZED_PRINCIPAL),
+        Collections.singleton(DOMAIN_URN), Collections.emptySet()); // Is owner
     assertEquals(
-        _policyEngine.getGrantedPrivileges(policies, UrnUtils.getUrn(AUTHORIZED_PRINCIPAL), Optional.of(resourceSpec)),
+        _policyEngine.getGrantedPrivileges(policies, resolvedAuthorizedUserSpec, Optional.of(resourceSpec)),
         ImmutableList.of("PRIVILEGE_1", "PRIVILEGE_2_1", "PRIVILEGE_2_2", "PRIVILEGE_3"));
 
-    resourceSpec = buildResourceResolvers("chart", RESOURCE_URN, Collections.singleton(AUTHORIZED_PRINCIPAL),
-        Collections.singleton(DOMAIN_URN)); // Resource type doesn't match
+    resourceSpec = buildEntityResolvers("chart", RESOURCE_URN, Collections.singleton(AUTHORIZED_PRINCIPAL),
+        Collections.singleton(DOMAIN_URN), Collections.emptySet()); // Resource type doesn't match
     assertEquals(
-        _policyEngine.getGrantedPrivileges(policies, UrnUtils.getUrn(AUTHORIZED_PRINCIPAL), Optional.of(resourceSpec)),
+        _policyEngine.getGrantedPrivileges(policies, resolvedAuthorizedUserSpec, Optional.of(resourceSpec)),
         Collections.emptyList());
   }
 
@@ -1050,9 +1051,9 @@ public void testGetMatchingActorsResourceMatch() throws Exception {
     resourceFilter.setResources(resourceUrns);
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec =
-        buildResourceResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL, AUTHORIZED_GROUP),
-            Collections.emptySet());
+    ResolvedEntitySpec resourceSpec =
+        buildEntityResolvers("dataset", RESOURCE_URN, ImmutableSet.of(AUTHORIZED_PRINCIPAL, AUTHORIZED_GROUP),
+            Collections.emptySet(), Collections.emptySet());
     PolicyEngine.PolicyActors actors = _policyEngine.getMatchingActors(dataHubPolicyInfo, Optional.of(resourceSpec));
 
     assertTrue(actors.allUsers());
@@ -1101,8 +1102,8 @@ public void testGetMatchingActorsNoResourceMatch() throws Exception {
     resourceFilter.setResources(resourceUrns);
     dataHubPolicyInfo.setResources(resourceFilter);
 
-    ResolvedResourceSpec resourceSpec =
-        buildResourceResolvers("dataset", "urn:li:dataset:random"); // A resource not covered by the policy.
+    ResolvedEntitySpec resourceSpec =
+        buildEntityResolvers("dataset", "urn:li:dataset:random"); // A resource not covered by the policy.
     PolicyEngine.PolicyActors actors = _policyEngine.getMatchingActors(dataHubPolicyInfo, Optional.of(resourceSpec));
 
     assertFalse(actors.allUsers());
@@ -1155,21 +1156,6 @@ private EntityResponse createAuthorizedEntityResponse() throws URISyntaxExceptio
     final EntityResponse entityResponse = new EntityResponse();
     final EnvelopedAspectMap aspectMap = new EnvelopedAspectMap();
 
-    final CorpUserInfo userInfo = new CorpUserInfo();
-    userInfo.setActive(true);
-    userInfo.setFullName("Data Hub");
-    userInfo.setFirstName("Data");
-    userInfo.setLastName("Hub");
-    userInfo.setEmail("datahub@gmail.com");
-    userInfo.setTitle("Admin");
-    aspectMap.put(CORP_USER_INFO_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(userInfo.data())));
-
-    final GroupMembership groupsAspect = new GroupMembership();
-    final UrnArray groups = new UrnArray();
-    groups.add(Urn.createFromString("urn:li:corpGroup:authorizedGroup"));
-    groupsAspect.setGroups(groups);
-    aspectMap.put(GROUP_MEMBERSHIP_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(groupsAspect.data())));
-
     final RoleMembership rolesAspect = new RoleMembership();
     final UrnArray roles = new UrnArray();
     roles.add(Urn.createFromString("urn:li:dataHubRole:admin"));
@@ -1184,21 +1170,6 @@ private EntityResponse createUnauthorizedEntityResponse() throws URISyntaxExcept
     final EntityResponse entityResponse = new EntityResponse();
     final EnvelopedAspectMap aspectMap = new EnvelopedAspectMap();
 
-    final CorpUserInfo userInfo = new CorpUserInfo();
-    userInfo.setActive(true);
-    userInfo.setFullName("Unauthorized User");
-    userInfo.setFirstName("Unauthorized");
-    userInfo.setLastName("User");
-    userInfo.setEmail("Unauth");
-    userInfo.setTitle("Engineer");
-    aspectMap.put(CORP_USER_INFO_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(userInfo.data())));
-
-    final GroupMembership groupsAspect = new GroupMembership();
-    final UrnArray groups = new UrnArray();
-    groups.add(Urn.createFromString("urn:li:corpGroup:unauthorizedGroup"));
-    groupsAspect.setGroups(groups);
-    aspectMap.put(GROUP_MEMBERSHIP_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(groupsAspect.data())));
-
     final RoleMembership rolesAspect = new RoleMembership();
     final UrnArray roles = new UrnArray();
     roles.add(Urn.createFromString("urn:li:dataHubRole:reader"));
@@ -1209,17 +1180,18 @@ private EntityResponse createUnauthorizedEntityResponse() throws URISyntaxExcept
     return entityResponse;
   }
 
-  public static ResolvedResourceSpec buildResourceResolvers(String entityType, String entityUrn) {
-    return buildResourceResolvers(entityType, entityUrn, Collections.emptySet(), Collections.emptySet());
+  public static ResolvedEntitySpec buildEntityResolvers(String entityType, String entityUrn) {
+    return buildEntityResolvers(entityType, entityUrn, Collections.emptySet(), Collections.emptySet(), Collections.emptySet());
   }
 
-  public static ResolvedResourceSpec buildResourceResolvers(String entityType, String entityUrn, Set<String> owners,
-      Set<String> domains) {
-    return new ResolvedResourceSpec(new ResourceSpec(entityType, entityUrn),
-        ImmutableMap.of(ResourceFieldType.RESOURCE_TYPE,
-            FieldResolver.getResolverFromValues(Collections.singleton(entityType)), ResourceFieldType.RESOURCE_URN,
-            FieldResolver.getResolverFromValues(Collections.singleton(entityUrn)), ResourceFieldType.OWNER,
-            FieldResolver.getResolverFromValues(owners), ResourceFieldType.DOMAIN,
-            FieldResolver.getResolverFromValues(domains)));
+  public static ResolvedEntitySpec buildEntityResolvers(String entityType, String entityUrn, Set<String> owners,
+      Set<String> domains, Set<String> groups) {
+    return new ResolvedEntitySpec(new EntitySpec(entityType, entityUrn),
+        ImmutableMap.of(EntityFieldType.TYPE,
+            FieldResolver.getResolverFromValues(Collections.singleton(entityType)), EntityFieldType.URN,
+            FieldResolver.getResolverFromValues(Collections.singleton(entityUrn)), EntityFieldType.OWNER,
+            FieldResolver.getResolverFromValues(owners), EntityFieldType.DOMAIN,
+            FieldResolver.getResolverFromValues(domains), EntityFieldType.GROUP_MEMBERSHIP,
+            FieldResolver.getResolverFromValues(groups)));
   }
 }
diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java
index e525c602c2620..b2343bbb01509 100644
--- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java
+++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java
@@ -1,8 +1,21 @@
 package com.datahub.authorization.fieldresolverprovider;
 
+import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME;
+import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME;
+import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ENTITY_NAME;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyZeroInteractions;
+import static org.mockito.Mockito.when;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
 import com.datahub.authentication.Authentication;
-import com.datahub.authorization.ResourceFieldType;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntityFieldType;
+import com.datahub.authorization.EntitySpec;
 import com.linkedin.common.DataPlatformInstance;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.entity.Aspect;
@@ -11,29 +24,21 @@
 import com.linkedin.entity.EnvelopedAspectMap;
 import com.linkedin.entity.client.EntityClient;
 import com.linkedin.r2.RemoteInvocationException;
+import java.net.URISyntaxException;
+import java.util.Collections;
+import java.util.Set;
 import org.mockito.Mock;
 import org.mockito.MockitoAnnotations;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
-import java.net.URISyntaxException;
-import java.util.Collections;
-import java.util.Set;
-
-import static com.linkedin.metadata.Constants.*;
-import static org.mockito.ArgumentMatchers.any;
-import static org.mockito.ArgumentMatchers.eq;
-import static org.mockito.Mockito.*;
-import static org.testng.Assert.assertEquals;
-import static org.testng.Assert.assertTrue;
-
 public class DataPlatformInstanceFieldResolverProviderTest {
 
   private static final String DATA_PLATFORM_INSTANCE_URN =
       "urn:li:dataPlatformInstance:(urn:li:dataPlatform:s3,test-platform-instance)";
   private static final String RESOURCE_URN =
       "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.testDataset,PROD)";
-  private static final ResourceSpec RESOURCE_SPEC = new ResourceSpec(DATASET_ENTITY_NAME, RESOURCE_URN);
+  private static final EntitySpec RESOURCE_SPEC = new EntitySpec(DATASET_ENTITY_NAME, RESOURCE_URN);
 
   @Mock
   private EntityClient entityClientMock;
@@ -51,12 +56,12 @@ public void setup() {
 
   @Test
   public void shouldReturnDataPlatformInstanceType() {
-    assertEquals(ResourceFieldType.DATA_PLATFORM_INSTANCE, dataPlatformInstanceFieldResolverProvider.getFieldType());
+    assertEquals(EntityFieldType.DATA_PLATFORM_INSTANCE, dataPlatformInstanceFieldResolverProvider.getFieldType());
   }
 
   @Test
   public void shouldReturnFieldValueWithResourceSpecIfTypeIsDataPlatformInstance() {
-    var resourceSpec = new ResourceSpec(DATA_PLATFORM_INSTANCE_ENTITY_NAME, DATA_PLATFORM_INSTANCE_URN);
+    var resourceSpec = new EntitySpec(DATA_PLATFORM_INSTANCE_ENTITY_NAME, DATA_PLATFORM_INSTANCE_URN);
 
     var result = dataPlatformInstanceFieldResolverProvider.getFieldResolver(resourceSpec);
 
diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProviderTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProviderTest.java
new file mode 100644
index 0000000000000..54675045b4413
--- /dev/null
+++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProviderTest.java
@@ -0,0 +1,212 @@
+package com.datahub.authorization.fieldresolverprovider;
+
+import com.datahub.authentication.Authentication;
+import com.datahub.authorization.EntityFieldType;
+import com.datahub.authorization.EntitySpec;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import com.linkedin.common.UrnArray;
+import com.linkedin.common.urn.Urn;
+import com.linkedin.entity.Aspect;
+import com.linkedin.entity.EntityResponse;
+import com.linkedin.entity.EnvelopedAspect;
+import com.linkedin.entity.EnvelopedAspectMap;
+import com.linkedin.entity.client.EntityClient;
+import com.linkedin.identity.GroupMembership;
+import com.linkedin.identity.NativeGroupMembership;
+import com.linkedin.r2.RemoteInvocationException;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import java.net.URISyntaxException;
+import java.util.Set;
+
+import static com.linkedin.metadata.Constants.*;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.*;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+public class GroupMembershipFieldResolverProviderTest {
+
+  private static final String CORPGROUP_URN = "urn:li:corpGroup:groupname";
+  private static final String NATIVE_CORPGROUP_URN = "urn:li:corpGroup:nativegroupname";
+  private static final String RESOURCE_URN = "urn:li:dataset:(urn:li:dataPlatform:testPlatform,testDataset,PROD)";
+  private static final EntitySpec RESOURCE_SPEC = new EntitySpec(DATASET_ENTITY_NAME, RESOURCE_URN);
+
+  @Mock
+  private EntityClient entityClientMock;
+  @Mock
+  private Authentication systemAuthenticationMock;
+
+  private GroupMembershipFieldResolverProvider groupMembershipFieldResolverProvider;
+
+  @BeforeMethod
+  public void setup() {
+    MockitoAnnotations.initMocks(this);
+    groupMembershipFieldResolverProvider =
+        new GroupMembershipFieldResolverProvider(entityClientMock, systemAuthenticationMock);
+  }
+
+  @Test
+  public void shouldReturnGroupsMembershipType() {
+    assertEquals(EntityFieldType.GROUP_MEMBERSHIP, groupMembershipFieldResolverProvider.getFieldType());
+  }
+
+  @Test
+  public void shouldReturnEmptyFieldValueWhenResponseIsNull() throws RemoteInvocationException, URISyntaxException {
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenReturn(null);
+
+    var result = groupMembershipFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertTrue(result.getFieldValuesFuture().join().getValues().isEmpty());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+
+  @Test
+  public void shouldReturnEmptyFieldValueWhenResourceDoesNotBelongToAnyGroup()
+      throws RemoteInvocationException, URISyntaxException {
+    var entityResponseMock = mock(EntityResponse.class);
+    when(entityResponseMock.getAspects()).thenReturn(new EnvelopedAspectMap());
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenReturn(entityResponseMock);
+
+    var result = groupMembershipFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertTrue(result.getFieldValuesFuture().join().getValues().isEmpty());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+
+  @Test
+  public void shouldReturnEmptyFieldValueWhenThereIsAnException() throws RemoteInvocationException, URISyntaxException {
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenThrow(new RemoteInvocationException());
+
+    var result = groupMembershipFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertTrue(result.getFieldValuesFuture().join().getValues().isEmpty());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+
+  @Test
+  public void shouldReturnFieldValueWithOnlyGroupsOfTheResource()
+      throws RemoteInvocationException, URISyntaxException {
+
+    var groupMembership = new GroupMembership().setGroups(
+        new UrnArray(ImmutableList.of(Urn.createFromString(CORPGROUP_URN))));
+    var entityResponseMock = mock(EntityResponse.class);
+    var envelopedAspectMap = new EnvelopedAspectMap();
+    envelopedAspectMap.put(GROUP_MEMBERSHIP_ASPECT_NAME,
+        new EnvelopedAspect().setValue(new Aspect(groupMembership.data())));
+    when(entityResponseMock.getAspects()).thenReturn(envelopedAspectMap);
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenReturn(entityResponseMock);
+
+    var result = groupMembershipFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertEquals(Set.of(CORPGROUP_URN), result.getFieldValuesFuture().join().getValues());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+
+  @Test
+  public void shouldReturnFieldValueWithOnlyNativeGroupsOfTheResource()
+      throws RemoteInvocationException, URISyntaxException {
+
+    var nativeGroupMembership = new NativeGroupMembership().setNativeGroups(
+        new UrnArray(ImmutableList.of(Urn.createFromString(NATIVE_CORPGROUP_URN))));
+    var entityResponseMock = mock(EntityResponse.class);
+    var envelopedAspectMap = new EnvelopedAspectMap();
+    envelopedAspectMap.put(NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME,
+        new EnvelopedAspect().setValue(new Aspect(nativeGroupMembership.data())));
+    when(entityResponseMock.getAspects()).thenReturn(envelopedAspectMap);
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenReturn(entityResponseMock);
+
+    var result = groupMembershipFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertEquals(Set.of(NATIVE_CORPGROUP_URN), result.getFieldValuesFuture().join().getValues());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+
+  @Test
+  public void shouldReturnFieldValueWithGroupsAndNativeGroupsOfTheResource()
+      throws RemoteInvocationException, URISyntaxException {
+
+    var groupMembership = new GroupMembership().setGroups(
+        new UrnArray(ImmutableList.of(Urn.createFromString(CORPGROUP_URN))));
+    var nativeGroupMembership = new NativeGroupMembership().setNativeGroups(
+        new UrnArray(ImmutableList.of(Urn.createFromString(NATIVE_CORPGROUP_URN))));
+    var entityResponseMock = mock(EntityResponse.class);
+    var envelopedAspectMap = new EnvelopedAspectMap();
+    envelopedAspectMap.put(GROUP_MEMBERSHIP_ASPECT_NAME,
+        new EnvelopedAspect().setValue(new Aspect(groupMembership.data())));
+    envelopedAspectMap.put(NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME,
+        new EnvelopedAspect().setValue(new Aspect(nativeGroupMembership.data())));
+    when(entityResponseMock.getAspects()).thenReturn(envelopedAspectMap);
+    when(entityClientMock.getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    )).thenReturn(entityResponseMock);
+
+    var result = groupMembershipFieldResolverProvider.getFieldResolver(RESOURCE_SPEC);
+
+    assertEquals(Set.of(CORPGROUP_URN, NATIVE_CORPGROUP_URN), result.getFieldValuesFuture().join().getValues());
+    verify(entityClientMock, times(1)).getV2(
+        eq(DATASET_ENTITY_NAME),
+        any(Urn.class),
+        eq(ImmutableSet.of(GROUP_MEMBERSHIP_ASPECT_NAME, NATIVE_GROUP_MEMBERSHIP_ASPECT_NAME)),
+        eq(systemAuthenticationMock)
+    );
+  }
+}
\ No newline at end of file
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java
index bf50a0c7b6473..b90257870a8b2 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java
@@ -2,12 +2,12 @@
 
 import com.datahub.authorization.AuthorizerChain;
 import com.datahub.authorization.DataHubAuthorizer;
-import com.datahub.authorization.DefaultResourceSpecResolver;
+import com.datahub.authorization.DefaultEntitySpecResolver;
 import com.datahub.plugins.PluginConstant;
 import com.datahub.authentication.Authentication;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.datahub.authorization.AuthorizerContext;
-import com.datahub.authorization.ResourceSpecResolver;
+import com.datahub.authorization.EntitySpecResolver;
 import com.datahub.plugins.common.PluginConfig;
 import com.datahub.plugins.common.PluginPermissionManager;
 import com.datahub.plugins.common.PluginType;
@@ -64,7 +64,7 @@ public class AuthorizerChainFactory {
   @Scope("singleton")
   @Nonnull
   protected AuthorizerChain getInstance() {
-    final ResourceSpecResolver resolver = initResolver();
+    final EntitySpecResolver resolver = initResolver();
 
     // Extract + initialize customer authorizers from application configs.
     final List<Authorizer> authorizers = new ArrayList<>(initCustomAuthorizers(resolver));
@@ -79,11 +79,11 @@ protected AuthorizerChain getInstance() {
     return new AuthorizerChain(authorizers, dataHubAuthorizer);
   }
 
-  private ResourceSpecResolver initResolver() {
-    return new DefaultResourceSpecResolver(systemAuthentication, entityClient);
+  private EntitySpecResolver initResolver() {
+    return new DefaultEntitySpecResolver(systemAuthentication, entityClient);
   }
 
-  private List<Authorizer> initCustomAuthorizers(ResourceSpecResolver resolver) {
+  private List<Authorizer> initCustomAuthorizers(EntitySpecResolver resolver) {
     final List<Authorizer> customAuthorizers = new ArrayList<>();
 
     Path pluginBaseDirectory = Paths.get(configurationProvider.getDatahub().getPlugin().getAuth().getPath());
@@ -99,7 +99,7 @@ private List<Authorizer> initCustomAuthorizers(ResourceSpecResolver resolver) {
     return customAuthorizers;
   }
 
-  private void registerAuthorizer(List<Authorizer> customAuthorizers, ResourceSpecResolver resolver, Config config) {
+  private void registerAuthorizer(List<Authorizer> customAuthorizers, EntitySpecResolver resolver, Config config) {
     PluginConfigFactory authorizerPluginPluginConfigFactory = new PluginConfigFactory(config);
     // Load only Authorizer configuration from plugin config factory
     List<PluginConfig> authorizers =
diff --git a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java
index ade49c876f168..207c2284e2673 100644
--- a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java
+++ b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java
@@ -45,8 +45,7 @@
 import io.datahubproject.openapi.util.OpenApiEntitiesUtil;
 import com.datahub.authorization.ConjunctivePrivilegeGroup;
 import com.datahub.authorization.DisjunctivePrivilegeGroup;
-import com.linkedin.metadata.models.EntitySpec;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.linkedin.metadata.authorization.PoliciesConfig;
 import com.google.common.collect.ImmutableList;
 import com.datahub.authorization.AuthUtil;
@@ -377,7 +376,7 @@ public ResponseEntity<S> scroll(@Valid Boolean systemMetadata, @Valid List<Strin
                                     @Valid String scrollId, @Valid List<String> sort, @Valid SortOrder sortOrder, @Valid String query) {
 
         Authentication authentication = AuthenticationContext.getAuthentication();
-        EntitySpec entitySpec = OpenApiEntitiesUtil.responseClassToEntitySpec(_entityRegistry, _respClazz);
+        com.linkedin.metadata.models.EntitySpec entitySpec = OpenApiEntitiesUtil.responseClassToEntitySpec(_entityRegistry, _respClazz);
         checkScrollAuthorized(authentication, entitySpec);
 
         // TODO multi-field sort
@@ -410,12 +409,12 @@ public ResponseEntity<S> scroll(@Valid Boolean systemMetadata, @Valid List<Strin
         return ResponseEntity.of(OpenApiEntitiesUtil.convertToScrollResponse(_scrollRespClazz, result.getScrollId(), entities));
     }
 
-    private void checkScrollAuthorized(Authentication authentication, EntitySpec entitySpec) {
+    private void checkScrollAuthorized(Authentication authentication, com.linkedin.metadata.models.EntitySpec entitySpec) {
         String actorUrnStr = authentication.getActor().toUrnStr();
         DisjunctivePrivilegeGroup orGroup = new DisjunctivePrivilegeGroup(ImmutableList.of(new ConjunctivePrivilegeGroup(
                 ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE.getType()))));
 
-        List<Optional<ResourceSpec>> resourceSpecs = List.of(Optional.of(new ResourceSpec(entitySpec.getName(), "")));
+        List<Optional<EntitySpec>> resourceSpecs = List.of(Optional.of(new EntitySpec(entitySpec.getName(), "")));
         if (_restApiAuthorizationEnabled && !AuthUtil.isAuthorizedForResources(_authorizationChain, actorUrnStr, resourceSpecs, orGroup)) {
             throw new UnauthorizedException(actorUrnStr + " is unauthorized to get entities.");
         }
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java
index 6439e2f31f7b0..898f768cf999a 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java
@@ -8,7 +8,7 @@
 import com.datahub.authorization.AuthorizerChain;
 import com.datahub.authorization.ConjunctivePrivilegeGroup;
 import com.datahub.authorization.DisjunctivePrivilegeGroup;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.urn.Urn;
@@ -93,8 +93,8 @@ public ResponseEntity<UrnResponseMap> getEntities(
         ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE.getType())
     )));
 
-    List<Optional<ResourceSpec>> resourceSpecs = entityUrns.stream()
-        .map(urn -> Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString())))
+    List<Optional<EntitySpec>> resourceSpecs = entityUrns.stream()
+        .map(urn -> Optional.of(new EntitySpec(urn.getEntityType(), urn.toString())))
         .collect(Collectors.toList());
     if (restApiAuthorizationEnabled && !AuthUtil.isAuthorizedForResources(_authorizerChain, actorUrnStr, resourceSpecs, orGroup)) {
       throw new UnauthorizedException(actorUrnStr + " is unauthorized to get entities.");
@@ -175,8 +175,8 @@ public ResponseEntity<List<RollbackRunResultDto>> deleteEntities(
         .map(URLDecoder::decode)
         .map(UrnUtils::getUrn).collect(Collectors.toSet());
 
-    List<Optional<ResourceSpec>> resourceSpecs = entityUrns.stream()
-        .map(urn -> Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString())))
+    List<Optional<EntitySpec>> resourceSpecs = entityUrns.stream()
+        .map(urn -> Optional.of(new EntitySpec(urn.getEntityType(), urn.toString())))
         .collect(Collectors.toList());
     if (restApiAuthorizationEnabled && !AuthUtil.isAuthorizedForResources(_authorizerChain, actorUrnStr, resourceSpecs, orGroup)) {
       UnauthorizedException unauthorizedException = new UnauthorizedException(actorUrnStr + " is unauthorized to delete entities.");
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/relationships/RelationshipsController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/relationships/RelationshipsController.java
index 1e37170f37b3b..4641fed3a8610 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/relationships/RelationshipsController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/relationships/RelationshipsController.java
@@ -8,7 +8,7 @@
 import com.datahub.authorization.AuthorizerChain;
 import com.datahub.authorization.ConjunctivePrivilegeGroup;
 import com.datahub.authorization.DisjunctivePrivilegeGroup;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
@@ -131,8 +131,8 @@ public ResponseEntity<RelatedEntitiesResult> getRelationships(
             // Re-using GET_ENTITY_PRIVILEGE here as it doesn't make sense to split the privileges between these APIs.
         )));
 
-    List<Optional<ResourceSpec>> resourceSpecs =
-        Collections.singletonList(Optional.of(new ResourceSpec(entityUrn.getEntityType(), entityUrn.toString())));
+    List<Optional<EntitySpec>> resourceSpecs =
+        Collections.singletonList(Optional.of(new EntitySpec(entityUrn.getEntityType(), entityUrn.toString())));
     if (restApiAuthorizationEnabled && !AuthUtil.isAuthorizedForResources(_authorizerChain, actorUrnStr, resourceSpecs,
         orGroup)) {
       throw new UnauthorizedException(actorUrnStr + " is unauthorized to get relationships.");
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/timeline/TimelineController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/timeline/TimelineController.java
index 5a0ce2e314e1b..fbde9e8072002 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/timeline/TimelineController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/timeline/TimelineController.java
@@ -6,7 +6,7 @@
 import com.datahub.authorization.AuthorizerChain;
 import com.datahub.authorization.ConjunctivePrivilegeGroup;
 import com.datahub.authorization.DisjunctivePrivilegeGroup;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.urn.Urn;
@@ -67,7 +67,7 @@ public ResponseEntity<List<ChangeTransaction>> getTimeline(
     Urn urn = Urn.createFromString(rawUrn);
     Authentication authentication = AuthenticationContext.getAuthentication();
     String actorUrnStr = authentication.getActor().toUrnStr();
-    ResourceSpec resourceSpec = new ResourceSpec(urn.getEntityType(), rawUrn);
+    EntitySpec resourceSpec = new EntitySpec(urn.getEntityType(), rawUrn);
     DisjunctivePrivilegeGroup orGroup = new DisjunctivePrivilegeGroup(
         ImmutableList.of(new ConjunctivePrivilegeGroup(ImmutableList.of(PoliciesConfig.GET_TIMELINE_PRIVILEGE.getType()))));
     if (restApiAuthorizationEnabled && !AuthUtil.isAuthorized(_authorizerChain, actorUrnStr, Optional.of(resourceSpec), orGroup)) {
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java
index 2b3e84e2df20f..21dc5a4c8a0d6 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java
@@ -5,7 +5,7 @@
 import com.datahub.authorization.AuthUtil;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.datahub.authorization.DisjunctivePrivilegeGroup;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
@@ -27,7 +27,6 @@
 import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl;
 import com.linkedin.metadata.entity.transactions.AspectsBatch;
 import com.linkedin.metadata.entity.validation.ValidationException;
-import com.linkedin.metadata.models.EntitySpec;
 import com.linkedin.metadata.entity.AspectUtils;
 import com.linkedin.metadata.utils.EntityKeyUtils;
 import com.linkedin.metadata.utils.metrics.MetricUtils;
@@ -378,11 +377,11 @@ public static GenericAspect convertGenericAspect(@Nonnull io.datahubproject.open
 
   public static boolean authorizeProposals(List<com.linkedin.mxe.MetadataChangeProposal> proposals, EntityService entityService,
       Authorizer authorizer, String actorUrnStr, DisjunctivePrivilegeGroup orGroup) {
-    List<Optional<ResourceSpec>> resourceSpecs = proposals.stream()
+    List<Optional<EntitySpec>> resourceSpecs = proposals.stream()
         .map(proposal -> {
-            EntitySpec entitySpec = entityService.getEntityRegistry().getEntitySpec(proposal.getEntityType());
+            com.linkedin.metadata.models.EntitySpec entitySpec = entityService.getEntityRegistry().getEntitySpec(proposal.getEntityType());
             Urn entityUrn = EntityKeyUtils.getUrnFromProposal(proposal, entitySpec.getKeyAspectSpec());
-            return Optional.of(new ResourceSpec(proposal.getEntityType(), entityUrn.toString()));
+            return Optional.of(new EntitySpec(proposal.getEntityType(), entityUrn.toString()));
         })
         .collect(Collectors.toList());
     return AuthUtil.isAuthorizedForResources(authorizer, actorUrnStr, resourceSpecs, orGroup);
@@ -513,7 +512,7 @@ public static RollbackRunResultDto mapRollbackRunResult(RollbackRunResult rollba
   }
 
   public static UpsertAspectRequest createStatusRemoval(Urn urn, EntityService entityService) {
-    EntitySpec entitySpec = entityService.getEntityRegistry().getEntitySpec(urn.getEntityType());
+    com.linkedin.metadata.models.EntitySpec entitySpec = entityService.getEntityRegistry().getEntitySpec(urn.getEntityType());
     if (entitySpec == null || !entitySpec.getAspectSpecMap().containsKey(STATUS_ASPECT_NAME)) {
       throw new IllegalArgumentException("Entity type is not valid for soft deletes: " + urn.getEntityType());
     }
diff --git a/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthorizer.java b/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthorizer.java
index b6bc282f10b65..442ac1b0d287b 100644
--- a/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthorizer.java
+++ b/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthorizer.java
@@ -4,7 +4,7 @@
 import com.datahub.authorization.AuthorizationResult;
 import com.datahub.authorization.AuthorizedActors;
 import com.datahub.authorization.AuthorizerContext;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.datahub.plugins.PluginConstant;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import java.io.BufferedReader;
@@ -74,7 +74,7 @@ public AuthorizationResult authorize(@Nonnull AuthorizationRequest request) {
   }
 
   @Override
-  public AuthorizedActors authorizedActors(String privilege, Optional<ResourceSpec> resourceSpec) {
+  public AuthorizedActors authorizedActors(String privilege, Optional<EntitySpec> resourceSpec) {
     return new AuthorizedActors("ALL", null, null, true, true);
   }
 }
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java
index 936c8bb67e645..af76af90ce77f 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java
@@ -3,7 +3,7 @@
 import com.codahale.metrics.MetricRegistry;
 import com.datahub.authentication.Authentication;
 import com.datahub.authentication.AuthenticationContext;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ImmutableList;
@@ -20,7 +20,6 @@
 import com.linkedin.metadata.entity.AspectUtils;
 import com.linkedin.metadata.entity.EntityService;
 import com.linkedin.metadata.entity.validation.ValidationException;
-import com.linkedin.metadata.models.EntitySpec;
 import com.linkedin.metadata.query.filter.Filter;
 import com.linkedin.metadata.query.filter.SortCriterion;
 import com.linkedin.metadata.restli.RestliUtil;
@@ -123,7 +122,7 @@ public Task<AnyRecord> get(@Nonnull String urnStr, @QueryParam("aspect") @Option
       Authentication authentication = AuthenticationContext.getAuthentication();
       if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
           && !isAuthorized(authentication, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE),
-          new ResourceSpec(urn.getEntityType(), urn.toString()))) {
+          new EntitySpec(urn.getEntityType(), urn.toString()))) {
         throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to get aspect for " + urn);
       }
       final VersionedAspect aspect = _entityService.getVersionedAspect(urn, aspectName, version);
@@ -154,7 +153,7 @@ public Task<GetTimeseriesAspectValuesResponse> getTimeseriesAspectValues(
       Authentication authentication = AuthenticationContext.getAuthentication();
       if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
           && !isAuthorized(authentication, _authorizer, ImmutableList.of(PoliciesConfig.GET_TIMESERIES_ASPECT_PRIVILEGE),
-          new ResourceSpec(urn.getEntityType(), urn.toString()))) {
+          new EntitySpec(urn.getEntityType(), urn.toString()))) {
         throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to get timeseries aspect for " + urn);
       }
       GetTimeseriesAspectValuesResponse response = new GetTimeseriesAspectValuesResponse();
@@ -193,11 +192,11 @@ public Task<String> ingestProposal(
     }
 
     Authentication authentication = AuthenticationContext.getAuthentication();
-    EntitySpec entitySpec = _entityService.getEntityRegistry().getEntitySpec(metadataChangeProposal.getEntityType());
+    com.linkedin.metadata.models.EntitySpec entitySpec = _entityService.getEntityRegistry().getEntitySpec(metadataChangeProposal.getEntityType());
     Urn urn = EntityKeyUtils.getUrnFromProposal(metadataChangeProposal, entitySpec.getKeyAspectSpec());
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(authentication, _authorizer, ImmutableList.of(PoliciesConfig.EDIT_ENTITY_PRIVILEGE),
-        new ResourceSpec(urn.getEntityType(), urn.toString()))) {
+        new EntitySpec(urn.getEntityType(), urn.toString()))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to modify entity " + urn);
     }
     String actorUrnStr = authentication.getActor().toUrnStr();
@@ -249,7 +248,7 @@ public Task<Integer> getCount(@ActionParam(PARAM_ASPECT) @Nonnull String aspectN
       Authentication authentication = AuthenticationContext.getAuthentication();
       if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
           && !isAuthorized(authentication, _authorizer, ImmutableList.of(PoliciesConfig.GET_COUNTS_PRIVILEGE),
-          (ResourceSpec) null)) {
+          (EntitySpec) null)) {
         throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED, "User is unauthorized to get aspect counts.");
       }
       return _entityService.getCountAspect(aspectName, urnLike);
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java
index 3ff22fb767676..9bab846d1bdcc 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java
@@ -4,7 +4,7 @@
 import com.datahub.authentication.Authentication;
 import com.datahub.authentication.AuthenticationContext;
 import com.datahub.plugins.auth.authorization.Authorizer;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.AuditStamp;
 import com.linkedin.common.urn.Urn;
@@ -123,9 +123,9 @@ public Task<RollbackResponse> rollback(@ActionParam("runId") @Nonnull String run
         List<AspectRowSummary> aspectRowsToDelete;
         aspectRowsToDelete = _systemMetadataService.findByRunId(runId, doHardDelete, 0, ESUtils.MAX_RESULT_SIZE);
         Set<String> urns = aspectRowsToDelete.stream().collect(Collectors.groupingBy(AspectRowSummary::getUrn)).keySet();
-        List<java.util.Optional<ResourceSpec>> resourceSpecs = urns.stream()
+        List<java.util.Optional<EntitySpec>> resourceSpecs = urns.stream()
             .map(UrnUtils::getUrn)
-            .map(urn -> java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString())))
+            .map(urn -> java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString())))
             .collect(Collectors.toList());
         Authentication auth = AuthenticationContext.getAuthentication();
         if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java
index f6dedfb9a07c6..3ee98b3244718 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java
@@ -3,7 +3,7 @@
 import com.codahale.metrics.MetricRegistry;
 import com.datahub.authentication.Authentication;
 import com.datahub.authentication.AuthenticationContext;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.AuditStamp;
@@ -173,7 +173,7 @@ public Task<AnyRecord> get(@Nonnull String urnStr,
     final Urn urn = Urn.createFromString(urnStr);
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), new ResourceSpec(urn.getEntityType(), urnStr))) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), new EntitySpec(urn.getEntityType(), urnStr))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to get entity " + urn);
     }
@@ -198,8 +198,8 @@ public Task<Map<String, AnyRecord>> batchGet(@Nonnull Set<String> urnStrs,
     for (final String urnStr : urnStrs) {
       urns.add(Urn.createFromString(urnStr));
     }
-    List<java.util.Optional<ResourceSpec>> resourceSpecs = urns.stream()
-        .map(urn -> java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString())))
+    List<java.util.Optional<EntitySpec>> resourceSpecs = urns.stream()
+        .map(urn -> java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString())))
         .collect(Collectors.toList());
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
@@ -242,7 +242,7 @@ public Task<Void> ingest(@ActionParam(PARAM_ENTITY) @Nonnull Entity entity,
     final Urn urn = com.datahub.util.ModelUtils.getUrnFromSnapshotUnion(entity.getValue());
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(authentication, _authorizer, ImmutableList.of(PoliciesConfig.EDIT_ENTITY_PRIVILEGE),
-        new ResourceSpec(urn.getEntityType(), urn.toString()))) {
+        new EntitySpec(urn.getEntityType(), urn.toString()))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to edit entity " + urn);
     }
@@ -273,10 +273,10 @@ public Task<Void> batchIngest(@ActionParam(PARAM_ENTITIES) @Nonnull Entity[] ent
 
     Authentication authentication = AuthenticationContext.getAuthentication();
     String actorUrnStr = authentication.getActor().toUrnStr();
-    List<java.util.Optional<ResourceSpec>> resourceSpecs = Arrays.stream(entities)
+    List<java.util.Optional<EntitySpec>> resourceSpecs = Arrays.stream(entities)
         .map(Entity::getValue)
         .map(com.datahub.util.ModelUtils::getUrnFromSnapshotUnion)
-        .map(urn -> java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString())))
+        .map(urn -> java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString())))
         .collect(Collectors.toList());
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(authentication, _authorizer, ImmutableList.of(PoliciesConfig.EDIT_ENTITY_PRIVILEGE), resourceSpecs)) {
@@ -322,7 +322,7 @@ public Task<SearchResult> search(@ActionParam(PARAM_ENTITY) @Nonnull String enti
       @Optional @Nullable @ActionParam(PARAM_SEARCH_FLAGS) SearchFlags searchFlags) {
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to search.");
     }
@@ -347,7 +347,7 @@ public Task<SearchResult> searchAcrossEntities(@ActionParam(PARAM_ENTITIES) @Opt
       @ActionParam(PARAM_COUNT) int count, @ActionParam(PARAM_SEARCH_FLAGS) @Optional SearchFlags searchFlags) {
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to search.");
     }
@@ -391,7 +391,7 @@ public Task<LineageSearchResult> searchAcrossLineage(@ActionParam(PARAM_URN) @No
       @Optional @Nullable @ActionParam(PARAM_SEARCH_FLAGS) SearchFlags searchFlags) throws URISyntaxException {
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to search.");
     }
@@ -443,7 +443,7 @@ public Task<ListResult> list(@ActionParam(PARAM_ENTITY) @Nonnull String entityNa
 
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to search.");
     }
@@ -462,7 +462,7 @@ public Task<AutoCompleteResult> autocomplete(@ActionParam(PARAM_ENTITY) @Nonnull
 
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to search.");
     }
@@ -479,7 +479,7 @@ public Task<BrowseResult> browse(@ActionParam(PARAM_ENTITY) @Nonnull String enti
 
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to search.");
     }
@@ -497,7 +497,7 @@ public Task<StringArray> getBrowsePaths(
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE),
-        new ResourceSpec(urn.getEntityType(), urn.toString()))) {
+        new EntitySpec(urn.getEntityType(), urn.toString()))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to get entity: " + urn);
     }
@@ -546,9 +546,9 @@ public Task<RollbackResponse> deleteEntities(@ActionParam("registryId") @Optiona
       log.info("found {} rows to delete...", stringifyRowCount(aspectRowsToDelete.size()));
       response.setAspectsAffected(aspectRowsToDelete.size());
       Set<String> urns = aspectRowsToDelete.stream().collect(Collectors.groupingBy(AspectRowSummary::getUrn)).keySet();
-      List<java.util.Optional<ResourceSpec>> resourceSpecs = urns.stream()
+      List<java.util.Optional<EntitySpec>> resourceSpecs = urns.stream()
           .map(UrnUtils::getUrn)
-          .map(urn -> java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString())))
+          .map(urn -> java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString())))
           .collect(Collectors.toList());
       Authentication auth = AuthenticationContext.getAuthentication();
       if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
@@ -590,7 +590,7 @@ public Task<DeleteEntityResponse> deleteEntity(@ActionParam(PARAM_URN) @Nonnull
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.DELETE_ENTITY_PRIVILEGE),
-        Collections.singletonList(java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString()))))) {
+        Collections.singletonList(java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString()))))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to delete entity: " + urnStr);
     }
@@ -638,7 +638,7 @@ private Long deleteTimeseriesAspects(@Nonnull Urn urn, @Nullable Long startTimeM
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.DELETE_ENTITY_PRIVILEGE),
-        new ResourceSpec(urn.getEntityType(), urn.toString()))) {
+        new EntitySpec(urn.getEntityType(), urn.toString()))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to delete entity " + urn);
     }
@@ -678,7 +678,7 @@ public Task<DeleteReferencesResponse> deleteReferencesTo(@ActionParam(PARAM_URN)
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.DELETE_ENTITY_PRIVILEGE),
-        new ResourceSpec(urn.getEntityType(), urnStr))) {
+        new EntitySpec(urn.getEntityType(), urnStr))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to delete entity " + urnStr);
     }
@@ -695,7 +695,7 @@ public Task<DeleteReferencesResponse> deleteReferencesTo(@ActionParam(PARAM_URN)
   public Task<Void> setWriteable(@ActionParam(PARAM_VALUE) @Optional("true") @Nonnull Boolean value) {
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SET_WRITEABLE_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SET_WRITEABLE_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to enable and disable write mode.");
     }
@@ -712,7 +712,7 @@ public Task<Void> setWriteable(@ActionParam(PARAM_VALUE) @Optional("true") @Nonn
   public Task<Long> getTotalEntityCount(@ActionParam(PARAM_ENTITY) @Nonnull String entityName) {
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_COUNTS_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_COUNTS_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to get entity counts.");
     }
@@ -725,7 +725,7 @@ public Task<Long> getTotalEntityCount(@ActionParam(PARAM_ENTITY) @Nonnull String
   public Task<LongMap> batchGetTotalEntityCount(@ActionParam(PARAM_ENTITIES) @Nonnull String[] entityNames) {
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_COUNTS_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_COUNTS_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to get entity counts.");
     }
@@ -739,7 +739,7 @@ public Task<ListUrnsResult> listUrns(@ActionParam(PARAM_ENTITY) @Nonnull String
       @ActionParam(PARAM_START) int start, @ActionParam(PARAM_COUNT) int count) throws URISyntaxException {
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to search.");
     }
@@ -757,10 +757,10 @@ public Task<String> applyRetention(@ActionParam(PARAM_START) @Optional @Nullable
                                      @ActionParam(PARAM_URN) @Optional @Nullable String urn
                                      ) {
     Authentication auth = AuthenticationContext.getAuthentication();
-    ResourceSpec resourceSpec = null;
+    EntitySpec resourceSpec = null;
     if (StringUtils.isNotBlank(urn)) {
       Urn resource = UrnUtils.getUrn(urn);
-      resourceSpec = new ResourceSpec(resource.getEntityType(), resource.toString());
+      resourceSpec = new EntitySpec(resource.getEntityType(), resource.toString());
     }
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.APPLY_RETENTION_PRIVILEGE), resourceSpec)) {
@@ -781,7 +781,7 @@ public Task<SearchResult> filter(@ActionParam(PARAM_ENTITY) @Nonnull String enti
 
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.SEARCH_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to search.");
     }
@@ -799,7 +799,7 @@ public Task<Boolean> exists(@ActionParam(PARAM_URN) @Nonnull String urnStr) thro
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE),
-        new ResourceSpec(urn.getEntityType(), urnStr))) {
+        new EntitySpec(urn.getEntityType(), urnStr))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized get entity: " + urnStr);
     }
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java
index 7efb93c0f50e6..0c3e93273b863 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java
@@ -4,7 +4,7 @@
 import com.datahub.authentication.Authentication;
 import com.datahub.authentication.AuthenticationContext;
 import com.datahub.plugins.auth.authorization.Authorizer;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.entity.EntityResponse;
@@ -68,7 +68,7 @@ public Task<EntityResponse> get(@Nonnull String urnStr,
     final Urn urn = Urn.createFromString(urnStr);
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), new ResourceSpec(urn.getEntityType(), urnStr))) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), new EntitySpec(urn.getEntityType(), urnStr))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to get entity " + urn);
     }
@@ -96,8 +96,8 @@ public Task<Map<Urn, EntityResponse>> batchGet(@Nonnull Set<String> urnStrs,
       urns.add(Urn.createFromString(urnStr));
     }
     Authentication auth = AuthenticationContext.getAuthentication();
-    List<java.util.Optional<ResourceSpec>> resourceSpecs = urns.stream()
-        .map(urn -> java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString())))
+    List<java.util.Optional<EntitySpec>> resourceSpecs = urns.stream()
+        .map(urn -> java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString())))
         .collect(Collectors.toList());
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), resourceSpecs)) {
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java
index fd5c3507b5408..05b7e6b3ff24b 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java
@@ -4,7 +4,7 @@
 import com.datahub.authentication.Authentication;
 import com.datahub.authentication.AuthenticationContext;
 import com.datahub.plugins.auth.authorization.Authorizer;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.VersionedUrn;
 import com.linkedin.common.urn.Urn;
@@ -65,9 +65,9 @@ public Task<Map<Urn, EntityResponse>> batchGetVersioned(
       @QueryParam(PARAM_ENTITY_TYPE) @Nonnull String entityType,
       @QueryParam(PARAM_ASPECTS) @Optional @Nullable String[] aspectNames) {
     Authentication auth = AuthenticationContext.getAuthentication();
-    List<java.util.Optional<ResourceSpec>> resourceSpecs = versionedUrnStrs.stream()
+    List<java.util.Optional<EntitySpec>> resourceSpecs = versionedUrnStrs.stream()
         .map(versionedUrn -> UrnUtils.getUrn(versionedUrn.getUrn()))
-        .map(urn -> java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString())))
+        .map(urn -> java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString())))
         .collect(Collectors.toList());
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE), resourceSpecs)) {
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java
index 313d16333f9e9..4a8e74c89039a 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java
@@ -4,7 +4,7 @@
 import com.datahub.authentication.Authentication;
 import com.datahub.authentication.AuthenticationContext;
 import com.datahub.plugins.auth.authorization.Authorizer;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.EntityRelationship;
 import com.linkedin.common.EntityRelationshipArray;
@@ -107,7 +107,7 @@ public Task<EntityRelationships> get(@QueryParam("urn") @Nonnull String rawUrn,
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE),
-        Collections.singletonList(java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString()))))) {
+        Collections.singletonList(java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString()))))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to get entity lineage: " + rawUrn);
     }
@@ -142,7 +142,7 @@ public UpdateResponse delete(@QueryParam("urn") @Nonnull String rawUrn) throws E
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.DELETE_ENTITY_PRIVILEGE),
-        Collections.singletonList(java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString()))))) {
+        Collections.singletonList(java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString()))))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to delete entity: " + rawUrn);
     }
@@ -162,7 +162,7 @@ public Task<EntityLineageResult> getLineage(@ActionParam(PARAM_URN) @Nonnull Str
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE),
-        Collections.singletonList(java.util.Optional.of(new ResourceSpec(urn.getEntityType(), urn.toString()))))) {
+        Collections.singletonList(java.util.Optional.of(new EntitySpec(urn.getEntityType(), urn.toString()))))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to get entity lineage: " + urnStr);
     }
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java
index 188e5ae18ee8f..12586b66495a9 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java
@@ -2,7 +2,7 @@
 
 import com.datahub.authentication.Authentication;
 import com.datahub.authentication.AuthenticationContext;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.urn.Urn;
@@ -37,10 +37,10 @@ public static String restoreIndices(
       @Nonnull EntityService entityService
   ) {
     Authentication authentication = AuthenticationContext.getAuthentication();
-    ResourceSpec resourceSpec = null;
+    EntitySpec resourceSpec = null;
     if (StringUtils.isNotBlank(urn)) {
       Urn resource = UrnUtils.getUrn(urn);
-      resourceSpec = new ResourceSpec(resource.getEntityType(), resource.toString());
+      resourceSpec = new EntitySpec(resource.getEntityType(), resource.toString());
     }
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(authentication, authorizer, ImmutableList.of(PoliciesConfig.RESTORE_INDICES_PRIVILEGE),
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java
index f36841bb4abae..a8018074497c4 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java
@@ -3,7 +3,7 @@
 import com.datahub.authentication.Authentication;
 import com.datahub.authentication.AuthenticationContext;
 import com.datahub.plugins.auth.authorization.Authorizer;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.entity.Entity;
 import com.linkedin.metadata.authorization.PoliciesConfig;
@@ -54,7 +54,7 @@ public Task<Void> producePlatformEvent(
       @ActionParam("event") @Nonnull PlatformEvent event) {
     Authentication auth = AuthenticationContext.getAuthentication();
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.PRODUCE_PLATFORM_EVENT_PRIVILEGE), (ResourceSpec) null)) {
+        && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.PRODUCE_PLATFORM_EVENT_PRIVILEGE), (EntitySpec) null)) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to produce platform events.");
     }
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java
index 5c3b90a84aec1..9949556c99b81 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java
@@ -4,7 +4,7 @@
 import com.datahub.authorization.AuthUtil;
 import com.datahub.authorization.ConjunctivePrivilegeGroup;
 import com.datahub.authorization.DisjunctivePrivilegeGroup;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.datahub.plugins.auth.authorization.Authorizer;
 import com.google.common.collect.ImmutableList;
 import com.linkedin.metadata.authorization.PoliciesConfig;
@@ -82,13 +82,13 @@ public static RestLiServiceException invalidArgumentsException(@Nullable String
   }
 
   public static boolean isAuthorized(@Nonnull Authentication authentication, @Nonnull Authorizer authorizer,
-      @Nonnull final List<PoliciesConfig.Privilege> privileges, @Nonnull final List<java.util.Optional<ResourceSpec>> resources) {
+      @Nonnull final List<PoliciesConfig.Privilege> privileges, @Nonnull final List<java.util.Optional<EntitySpec>> resources) {
     DisjunctivePrivilegeGroup orGroup = convertPrivilegeGroup(privileges);
     return AuthUtil.isAuthorizedForResources(authorizer, authentication.getActor().toUrnStr(), resources, orGroup);
   }
 
   public static boolean isAuthorized(@Nonnull Authentication authentication, @Nonnull Authorizer authorizer,
-      @Nonnull final List<PoliciesConfig.Privilege> privileges, @Nullable final ResourceSpec resource) {
+      @Nonnull final List<PoliciesConfig.Privilege> privileges, @Nullable final EntitySpec resource) {
     DisjunctivePrivilegeGroup orGroup = convertPrivilegeGroup(privileges);
     return AuthUtil.isAuthorized(authorizer, authentication.getActor().toUrnStr(), java.util.Optional.ofNullable(resource), orGroup);
   }
diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java
index be70cf9c494ef..02d413301f3b4 100644
--- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java
+++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java
@@ -4,7 +4,7 @@
 import com.datahub.authentication.Authentication;
 import com.datahub.authentication.AuthenticationContext;
 import com.datahub.plugins.auth.authorization.Authorizer;
-import com.datahub.authorization.ResourceSpec;
+import com.datahub.authorization.EntitySpec;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.core.StreamReadConstraints;
 import com.fasterxml.jackson.databind.JsonNode;
@@ -125,7 +125,7 @@ public Task<Void> batchIngest(@ActionParam(PARAM_BUCKETS) @Nonnull UsageAggregat
     return RestliUtil.toTask(() -> {
       Authentication auth = AuthenticationContext.getAuthentication();
       if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
-          && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.EDIT_ENTITY_PRIVILEGE), (ResourceSpec) null)) {
+          && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.EDIT_ENTITY_PRIVILEGE), (EntitySpec) null)) {
         throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
             "User is unauthorized to edit entities.");
       }
@@ -323,7 +323,7 @@ public Task<UsageQueryResult> query(@ActionParam(PARAM_RESOURCE) @Nonnull String
       Urn resourceUrn = UrnUtils.getUrn(resource);
       if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
           && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE),
-          new ResourceSpec(resourceUrn.getEntityType(), resourceUrn.toString()))) {
+          new EntitySpec(resourceUrn.getEntityType(), resourceUrn.toString()))) {
         throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
             "User is unauthorized to query usage.");
       }
@@ -383,7 +383,7 @@ public Task<UsageQueryResult> queryRange(@ActionParam(PARAM_RESOURCE) @Nonnull S
     Urn resourceUrn = UrnUtils.getUrn(resource);
     if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))
         && !isAuthorized(auth, _authorizer, ImmutableList.of(PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE),
-            new ResourceSpec(resourceUrn.getEntityType(), resourceUrn.toString()))) {
+            new EntitySpec(resourceUrn.getEntityType(), resourceUrn.toString()))) {
       throw new RestLiServiceException(HttpStatus.S_401_UNAUTHORIZED,
           "User is unauthorized to query usage.");
     }

From d04d25bf428aa442b08a4011fcac81b3d1526a86 Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Thu, 12 Oct 2023 15:50:20 -0400
Subject: [PATCH 125/156] smoke test(): Query plus filter search test (#8993)

---
 .../e2e/search/query_and_filter_search.js     | 57 +++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 smoke-test/tests/cypress/cypress/e2e/search/query_and_filter_search.js

diff --git a/smoke-test/tests/cypress/cypress/e2e/search/query_and_filter_search.js b/smoke-test/tests/cypress/cypress/e2e/search/query_and_filter_search.js
new file mode 100644
index 0000000000000..4637310b86496
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress/e2e/search/query_and_filter_search.js
@@ -0,0 +1,57 @@
+describe("auto-complete dropdown, filter plus query search test", () => {
+
+  const platformQuerySearch = (query,test_id,active_filter) => {
+    cy.visit("/");
+    cy.get("input[data-testid=search-input]").type(query);
+    cy.get(`[data-testid="quick-filter-urn:li:dataPlatform:${test_id}"]`).click();
+    cy.focused().type("{enter}").wait(3000);
+    cy.url().should(
+      "include",
+      `?filter_platform___false___EQUAL___0=urn%3Ali%3AdataPlatform%3A${test_id}`
+    );
+    cy.get('[data-testid="search-input"]').should("have.value", query);
+    cy.get(`[data-testid="active-filter-${active_filter}"]`).should("be.visible");
+    cy.contains("of 0 results").should("not.exist");
+    cy.contains(/of [0-9]+ results/);
+  }
+
+  const entityQuerySearch = (query,test_id,active_filter) => {
+    cy.visit("/");
+    cy.get("input[data-testid=search-input]").type(query);
+    cy.get(`[data-testid="quick-filter-${test_id}"]`).click();
+    cy.focused().type("{enter}").wait(3000);
+    cy.url().should(
+      "include",
+      `?filter__entityType___false___EQUAL___0=${test_id}`
+    );
+    cy.get('[data-testid="search-input"]').should("have.value", query);
+    cy.get(`[data-testid="active-filter-${active_filter}"]`).should("be.visible");
+    cy.contains("of 0 results").should("not.exist");
+    cy.contains(/of [0-9]+ results/);
+  }
+
+  it("verify the 'filter by' section + query (result in search page with query applied + filter applied)", () => {
+    // Platform query plus filter test
+    cy.loginWithCredentials();
+    // Airflow
+    platformQuerySearch ("cypress","airflow","Airflow");
+    // BigQuery
+    platformQuerySearch ("cypress","bigquery","BigQuery");
+    // dbt
+    platformQuerySearch ("cypress","dbt","dbt");
+    // Hive 
+    platformQuerySearch ("cypress","hive","Hive");
+
+    // Entity type query plus filter test
+    // Datasets
+    entityQuerySearch ("cypress","DATASET","Datasets");
+    // Dashboards
+    entityQuerySearch ("cypress","DASHBOARD","Dashboards");
+    // Pipelines
+    entityQuerySearch ("cypress","DATA_FLOW","Pipelines");
+    // Domains
+    entityQuerySearch ("Marketing","DOMAIN","Domains");
+    // Glossary Terms
+    entityQuerySearch ("cypress","GLOSSARY_TERM","Glossary Terms");
+  });
+});
\ No newline at end of file

From a8f0080c08b5c816f0dae9d3bef07ea00220541e Mon Sep 17 00:00:00 2001
From: Tamas Nemeth <treff7es@gmail.com>
Date: Fri, 13 Oct 2023 00:14:45 +0200
Subject: [PATCH 126/156] feat(ingest/teradata): Teradata source (#8977)

---
 .../docs/sources/teradata/teradata_pre.md     |  28 +++
 .../docs/sources/teradata/teradata_recipe.yml |  17 ++
 metadata-ingestion/setup.py                   |   3 +
 .../datahub/ingestion/source/sql/teradata.py  | 228 ++++++++++++++++++
 .../testing/check_sql_parser_result.py        |   5 +-
 .../src/datahub/utilities/sqlglot_lineage.py  |   5 +
 .../test_teradata_default_normalization.json  |  38 +++
 .../unit/sql_parsing/test_sqlglot_lineage.py  |  42 ++++
 8 files changed, 365 insertions(+), 1 deletion(-)
 create mode 100644 metadata-ingestion/docs/sources/teradata/teradata_pre.md
 create mode 100644 metadata-ingestion/docs/sources/teradata/teradata_recipe.yml
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
 create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json

diff --git a/metadata-ingestion/docs/sources/teradata/teradata_pre.md b/metadata-ingestion/docs/sources/teradata/teradata_pre.md
new file mode 100644
index 0000000000000..eb59caa29eb52
--- /dev/null
+++ b/metadata-ingestion/docs/sources/teradata/teradata_pre.md
@@ -0,0 +1,28 @@
+### Prerequisites
+1. Create a user which has access to the database you want to ingest.
+    ```sql
+    CREATE USER datahub FROM <database> AS PASSWORD = <password> PERM = 20000000;
+    ```
+2. Create a user with the following privileges:
+    ```sql
+    GRANT SELECT ON dbc.columns TO datahub;
+    GRANT SELECT ON dbc.databases TO datahub;
+    GRANT SELECT ON dbc.tables TO datahub;
+    GRANT SELECT ON DBC.All_RI_ChildrenV TO datahub;
+    GRANT SELECT ON DBC.ColumnsV TO datahub;
+    GRANT SELECT ON DBC.IndicesV TO datahub;
+    GRANT SELECT ON dbc.TableTextV TO datahub;
+    GRANT SELECT ON dbc.TablesV TO datahub;
+    GRANT SELECT ON dbc.dbqlogtbl TO datahub; -- if lineage or usage extraction is enabled
+    ```
+   
+    If you want to run profiling, you need to grant select permission on all the tables you want to profile.
+
+3. If linege or usage extraction is enabled, please, check if query logging is enabled and it is set to size which
+will fit for your queries (the default query text size Teradata captures is max 200 chars)
+   An example how you can set it for all users:
+    ```sql
+    REPLACE QUERY LOGGING LIMIT SQLTEXT=2000 ON ALL;
+    ```
+   See more here about query logging:
+      [https://docs.teradata.com/r/Teradata-VantageCloud-Lake/Database-Reference/Database-Administration/Tracking-Query-Behavior-with-Database-Query-Logging-Operational-DBAs]()
diff --git a/metadata-ingestion/docs/sources/teradata/teradata_recipe.yml b/metadata-ingestion/docs/sources/teradata/teradata_recipe.yml
new file mode 100644
index 0000000000000..8cf07ba4c3a01
--- /dev/null
+++ b/metadata-ingestion/docs/sources/teradata/teradata_recipe.yml
@@ -0,0 +1,17 @@
+pipeline_name: my-teradata-ingestion-pipeline
+source:
+  type: teradata
+  config:
+    host_port: "myteradatainstance.teradata.com:1025"
+    #platform_instance: "myteradatainstance"
+    username: myuser
+    password: mypassword
+    #database_pattern:
+    #  allow:
+    #    - "demo_user"
+    #  ignoreCase: true
+    include_table_lineage: true
+    include_usage_statistics: true
+    stateful_ingestion:
+      enabled: true
+sink:
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 61e7b684682a4..3ea9a2ea61d74 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -373,6 +373,7 @@
     # FIXME: I don't think tableau uses sqllineage anymore so we should be able
     # to remove that dependency.
     "tableau": {"tableauserverclient>=0.17.0"} | sqllineage_lib | sqlglot_lib,
+    "teradata": sql_common | {"teradatasqlalchemy>=17.20.0.0"},
     "trino": sql_common | trino,
     "starburst-trino-usage": sql_common | usage_common | trino,
     "nifi": {"requests", "packaging", "requests-gssapi"},
@@ -499,6 +500,7 @@
             "s3",
             "snowflake",
             "tableau",
+            "teradata",
             "trino",
             "hive",
             "starburst-trino-usage",
@@ -597,6 +599,7 @@
         "tableau = datahub.ingestion.source.tableau:TableauSource",
         "openapi = datahub.ingestion.source.openapi:OpenApiSource",
         "metabase = datahub.ingestion.source.metabase:MetabaseSource",
+        "teradata = datahub.ingestion.source.sql.teradata:TeradataSource",
         "trino = datahub.ingestion.source.sql.trino:TrinoSource",
         "starburst-trino-usage = datahub.ingestion.source.usage.starburst_trino_usage:TrinoUsageSource",
         "nifi = datahub.ingestion.source.nifi:NifiSource",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
new file mode 100644
index 0000000000000..dd11cd840bed9
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
@@ -0,0 +1,228 @@
+import logging
+from dataclasses import dataclass
+from typing import Iterable, Optional, Set, Union
+
+# This import verifies that the dependencies are available.
+import teradatasqlalchemy  # noqa: F401
+import teradatasqlalchemy.types as custom_types
+from pydantic.fields import Field
+from sqlalchemy import create_engine
+from sqlalchemy.engine import Engine
+
+from datahub.configuration.common import AllowDenyPattern
+from datahub.configuration.time_window_config import BaseTimeWindowConfig
+from datahub.emitter.sql_parsing_builder import SqlParsingBuilder
+from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.api.decorators import (
+    SourceCapability,
+    SupportStatus,
+    capability,
+    config_class,
+    platform_name,
+    support_status,
+)
+from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.graph.client import DataHubGraph
+from datahub.ingestion.source.sql.sql_common import SqlWorkUnit, register_custom_type
+from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport
+from datahub.ingestion.source.sql.two_tier_sql_source import (
+    TwoTierSQLAlchemyConfig,
+    TwoTierSQLAlchemySource,
+)
+from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
+from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
+from datahub.ingestion.source_report.time_window import BaseTimeWindowReport
+from datahub.metadata.com.linkedin.pegasus2avro.schema import (
+    BytesTypeClass,
+    TimeTypeClass,
+)
+from datahub.utilities.sqlglot_lineage import SchemaResolver, sqlglot_lineage
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+register_custom_type(custom_types.JSON, BytesTypeClass)
+register_custom_type(custom_types.INTERVAL_DAY, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_DAY_TO_SECOND, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_DAY_TO_MINUTE, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_DAY_TO_HOUR, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_SECOND, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_MINUTE, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_MINUTE_TO_SECOND, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_HOUR, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_HOUR_TO_MINUTE, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_HOUR_TO_SECOND, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_MONTH, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_YEAR, TimeTypeClass)
+register_custom_type(custom_types.INTERVAL_YEAR_TO_MONTH, TimeTypeClass)
+register_custom_type(custom_types.MBB, BytesTypeClass)
+register_custom_type(custom_types.MBR, BytesTypeClass)
+register_custom_type(custom_types.GEOMETRY, BytesTypeClass)
+register_custom_type(custom_types.TDUDT, BytesTypeClass)
+register_custom_type(custom_types.XML, BytesTypeClass)
+
+
+@dataclass
+class TeradataReport(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowReport):
+    num_queries_parsed: int = 0
+    num_table_parse_failures: int = 0
+
+
+class BaseTeradataConfig(TwoTierSQLAlchemyConfig):
+    scheme = Field(default="teradatasql", description="database scheme")
+
+
+class TeradataConfig(BaseTeradataConfig, BaseTimeWindowConfig):
+    database_pattern = Field(
+        default=AllowDenyPattern(deny=["dbc"]),
+        description="Regex patterns for databases to filter in ingestion.",
+    )
+    include_table_lineage = Field(
+        default=False,
+        description="Whether to include table lineage in the ingestion. "
+        "This requires to have the table lineage feature enabled.",
+    )
+
+    usage: BaseUsageConfig = Field(
+        description="The usage config to use when generating usage statistics",
+        default=BaseUsageConfig(),
+    )
+
+    use_schema_resolver: bool = Field(
+        default=True,
+        description="Read SchemaMetadata aspects from DataHub to aid in SQL parsing. Turn off only for testing.",
+        hidden_from_docs=True,
+    )
+
+    default_db: Optional[str] = Field(
+        default=None,
+        description="The default database to use for unqualified table names",
+    )
+
+    include_usage_statistics: bool = Field(
+        default=False,
+        description="Generate usage statistic.",
+    )
+
+
+@platform_name("Teradata")
+@config_class(TeradataConfig)
+@support_status(SupportStatus.TESTING)
+@capability(SourceCapability.DOMAINS, "Enabled by default")
+@capability(SourceCapability.CONTAINERS, "Enabled by default")
+@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
+@capability(SourceCapability.DELETION_DETECTION, "Optionally enabled via configuration")
+@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
+@capability(SourceCapability.LINEAGE_COARSE, "Optionally enabled via configuration")
+@capability(SourceCapability.LINEAGE_FINE, "Optionally enabled via configuration")
+@capability(SourceCapability.USAGE_STATS, "Optionally enabled via configuration")
+class TeradataSource(TwoTierSQLAlchemySource):
+    """
+    This plugin extracts the following:
+
+    - Metadata for databases, schemas, views, and tables
+    - Column types associated with each table
+    - Table, row, and column statistics via optional SQL profiling
+    """
+
+    config: TeradataConfig
+
+    LINEAGE_QUERY: str = """SELECT ProcID, UserName as "user", StartTime AT TIME ZONE 'GMT' as "timestamp", DefaultDatabase as default_database, QueryText as query
+     FROM "DBC".DBQLogTbl
+     where ErrorCode = 0
+     and QueryText like 'create table demo_user.test_lineage%'
+     and "timestamp" >= TIMESTAMP '{start_time}'
+     and "timestamp" < TIMESTAMP '{end_time}'
+     """
+    urns: Optional[Set[str]]
+
+    def __init__(self, config: TeradataConfig, ctx: PipelineContext):
+        super().__init__(config, ctx, "teradata")
+
+        self.report: TeradataReport = TeradataReport()
+        self.graph: Optional[DataHubGraph] = ctx.graph
+
+        if self.graph:
+            if self.config.use_schema_resolver:
+                self.schema_resolver = (
+                    self.graph.initialize_schema_resolver_from_datahub(
+                        platform=self.platform,
+                        platform_instance=self.config.platform_instance,
+                        env=self.config.env,
+                    )
+                )
+                self.urns = self.schema_resolver.get_urns()
+            else:
+                self.schema_resolver = self.graph._make_schema_resolver(
+                    platform=self.platform,
+                    platform_instance=self.config.platform_instance,
+                    env=self.config.env,
+                )
+                self.urns = None
+        else:
+            self.schema_resolver = SchemaResolver(
+                platform=self.platform,
+                platform_instance=self.config.platform_instance,
+                graph=None,
+                env=self.config.env,
+            )
+            self.urns = None
+
+        self.builder: SqlParsingBuilder = SqlParsingBuilder(
+            usage_config=self.config.usage
+            if self.config.include_usage_statistics
+            else None,
+            generate_lineage=self.config.include_table_lineage,
+            generate_usage_statistics=self.config.include_usage_statistics,
+            generate_operations=self.config.usage.include_operational_stats,
+        )
+
+    @classmethod
+    def create(cls, config_dict, ctx):
+        config = TeradataConfig.parse_obj(config_dict)
+        return cls(config, ctx)
+
+    def get_audit_log_mcps(self) -> Iterable[MetadataWorkUnit]:
+        engine = self.get_metadata_engine()
+        for entry in engine.execute(
+            self.LINEAGE_QUERY.format(
+                start_time=self.config.start_time, end_time=self.config.end_time
+            )
+        ):
+            self.report.num_queries_parsed += 1
+            if self.report.num_queries_parsed % 1000 == 0:
+                logger.info(f"Parsed {self.report.num_queries_parsed} queries")
+
+            result = sqlglot_lineage(
+                sql=entry.query,
+                schema_resolver=self.schema_resolver,
+                default_db=None,
+                default_schema=entry.default_database
+                if entry.default_database
+                else self.config.default_db,
+            )
+            if result.debug_info.table_error:
+                logger.debug(
+                    f"Error parsing table lineage, {result.debug_info.table_error}"
+                )
+                self.report.num_table_parse_failures += 1
+                continue
+
+            yield from self.builder.process_sql_parsing_result(
+                result,
+                query=entry.query,
+                query_timestamp=entry.timestamp,
+                user=f"urn:li:corpuser:{entry.user}",
+                include_urns=self.urns,
+            )
+
+    def get_metadata_engine(self) -> Engine:
+        url = self.config.get_sql_alchemy_url()
+        logger.debug(f"sql_alchemy_url={url}")
+        return create_engine(url, **self.config.options)
+
+    def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
+        yield from super().get_workunits_internal()
+        if self.config.include_table_lineage or self.config.include_usage_statistics:
+            self.report.report_ingestion_stage_start("audit log extraction")
+            yield from self.get_audit_log_mcps()
+            yield from self.builder.gen_workunits()
diff --git a/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py b/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py
index 8516a7054a9cd..b3b1331db768b 100644
--- a/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py
+++ b/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py
@@ -70,11 +70,14 @@ def assert_sql_result(
     sql: str,
     *,
     dialect: str,
+    platform_instance: Optional[str] = None,
     expected_file: pathlib.Path,
     schemas: Optional[Dict[str, SchemaInfo]] = None,
     **kwargs: Any,
 ) -> None:
-    schema_resolver = SchemaResolver(platform=dialect)
+    schema_resolver = SchemaResolver(
+        platform=dialect, platform_instance=platform_instance
+    )
     if schemas:
         for urn, schema in schemas.items():
             schema_resolver.add_raw_schema_info(urn, schema)
diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
index 349eb40a5e865..c830ec8c02fd4 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
@@ -482,6 +482,11 @@ def _column_level_lineage(  # noqa: C901
         # Our snowflake source lowercases column identifiers, so we are forced
         # to do fuzzy (case-insensitive) resolution instead of exact resolution.
         "snowflake",
+        # Teradata column names are case-insensitive.
+        # A name, even when enclosed in double quotation marks, is not case sensitive. For example, CUSTOMER and Customer are the same.
+        # See more below:
+        # https://documentation.sas.com/doc/en/pgmsascdc/9.4_3.5/acreldb/n0ejgx4895bofnn14rlguktfx5r3.htm
+        "teradata",
     }
 
     sqlglot_db_schema = sqlglot.MappingSchema(
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json
new file mode 100644
index 0000000000000..b0351a7e07ad2
--- /dev/null
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json
@@ -0,0 +1,38 @@
+{
+    "query_type": "CREATE",
+    "in_tables": [
+        "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.pima_patient_diagnoses,PROD)",
+        "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.pima_patient_features,PROD)"
+    ],
+    "out_tables": [
+        "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.test_lineage2,PROD)"
+    ],
+    "column_lineage": [
+        {
+            "downstream": {
+                "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.test_lineage2,PROD)",
+                "column": "PatientId",
+                "native_column_type": "INTEGER()"
+            },
+            "upstreams": [
+                {
+                    "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.pima_patient_diagnoses,PROD)",
+                    "column": "PatientId"
+                }
+            ]
+        },
+        {
+            "downstream": {
+                "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.test_lineage2,PROD)",
+                "column": "BMI",
+                "native_column_type": "FLOAT()"
+            },
+            "upstreams": [
+                {
+                    "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.pima_patient_features,PROD)",
+                    "column": "BMI"
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
index bb6e5f1581754..059add8db67e4 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
+++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
@@ -630,3 +630,45 @@ def test_snowflake_column_cast():
 
 
 # TODO: Add a test for setting platform_instance or env
+
+
+def test_teradata_default_normalization():
+    assert_sql_result(
+        """
+create table demo_user.test_lineage2 as
+ (
+    select
+        ppd.PatientId,
+        ppf.bmi
+    from
+        demo_user.pima_patient_features ppf
+    join demo_user.pima_patient_diagnoses ppd on
+        ppd.PatientId = ppf.PatientId
+ ) with data;
+""",
+        dialect="teradata",
+        default_schema="dbc",
+        platform_instance="myteradata",
+        schemas={
+            "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.pima_patient_diagnoses,PROD)": {
+                "HasDiabetes": "INTEGER()",
+                "PatientId": "INTEGER()",
+            },
+            "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.pima_patient_features,PROD)": {
+                "Age": "INTEGER()",
+                "BMI": "FLOAT()",
+                "BloodP": "INTEGER()",
+                "DiPedFunc": "FLOAT()",
+                "NumTimesPrg": "INTEGER()",
+                "PatientId": "INTEGER()",
+                "PlGlcConc": "INTEGER()",
+                "SkinThick": "INTEGER()",
+                "TwoHourSerIns": "INTEGER()",
+            },
+            "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.test_lineage2,PROD)": {
+                "BMI": "FLOAT()",
+                "PatientId": "INTEGER()",
+            },
+        },
+        expected_file=RESOURCE_DIR / "test_teradata_default_normalization.json",
+    )

From 71c9bd3a495c1f3663d2268088f04d56dd8c37c9 Mon Sep 17 00:00:00 2001
From: Aseem Bansal <asmbansal2@gmail.com>
Date: Fri, 13 Oct 2023 11:48:22 +0530
Subject: [PATCH 127/156] ci(ingest): update base requirements (#8995)

---
 .../base-requirements.txt                     | 398 +++++++++---------
 1 file changed, 205 insertions(+), 193 deletions(-)

diff --git a/docker/datahub-ingestion-base/base-requirements.txt b/docker/datahub-ingestion-base/base-requirements.txt
index 82d9a93a9a2c3..eb082d50b3020 100644
--- a/docker/datahub-ingestion-base/base-requirements.txt
+++ b/docker/datahub-ingestion-base/base-requirements.txt
@@ -2,62 +2,58 @@
 # pyspark==3.0.3
 # pydeequ==1.0.1
 
-acryl-datahub-classify==0.0.6
-acryl-iceberg-legacy==0.0.4
-acryl-PyHive==0.6.13
-aenum==3.1.12
-aiohttp==3.8.4
+acryl-datahub-classify==0.0.8
+acryl-PyHive==0.6.14
+acryl-sqlglot==18.5.2.dev45
+aenum==3.1.15
+aiohttp==3.8.6
 aiosignal==1.3.1
-alembic==1.11.1
+alembic==1.12.0
 altair==4.2.0
-anyio==3.7.0
-apache-airflow==2.6.1
-apache-airflow-providers-common-sql==1.5.1
-apache-airflow-providers-ftp==3.4.1
-apache-airflow-providers-http==4.4.1
-apache-airflow-providers-imap==3.2.1
-apache-airflow-providers-sqlite==3.4.1
-apispec==5.2.2
+anyio==3.7.1
+apache-airflow==2.7.2
+apache-airflow-providers-common-sql==1.7.2
+apache-airflow-providers-ftp==3.5.2
+apache-airflow-providers-http==4.5.2
+apache-airflow-providers-imap==3.3.2
+apache-airflow-providers-sqlite==3.4.3
+apispec==6.3.0
 appdirs==1.4.4
 appnope==0.1.3
-argcomplete==3.0.8
-argon2-cffi==21.3.0
+argcomplete==3.1.2
+argon2-cffi==23.1.0
 argon2-cffi-bindings==21.2.0
 asgiref==3.7.2
 asn1crypto==1.5.1
-asttokens==2.2.1
-async-timeout==4.0.2
+asttokens==2.4.0
+async-timeout==4.0.3
 asynch==0.2.2
 attrs==23.1.0
 avro==1.10.2
-avro-gen3==0.7.10
-azure-core==1.26.4
-azure-identity==1.10.0
-azure-storage-blob==12.16.0
-azure-storage-file-datalake==12.11.0
-Babel==2.12.1
+avro-gen3==0.7.11
+Babel==2.13.0
 backcall==0.2.0
 backoff==2.2.1
 beautifulsoup4==4.12.2
-bleach==6.0.0
-blinker==1.6.2
-blis==0.7.9
-boto3==1.26.142
-botocore==1.29.142
+bleach==6.1.0
+blinker==1.6.3
+blis==0.7.11
+boto3==1.28.62
+botocore==1.31.62
 bowler==0.9.0
-bracex==2.3.post1
+bracex==2.4
 cached-property==1.5.2
 cachelib==0.9.0
 cachetools==5.3.1
-catalogue==2.0.8
-cattrs==22.2.0
-certifi==2023.5.7
-cffi==1.15.1
-chardet==5.1.0
-charset-normalizer==2.1.1
+catalogue==2.0.10
+cattrs==23.1.2
+certifi==2023.7.22
+cffi==1.16.0
+chardet==5.2.0
+charset-normalizer==3.3.0
 ciso8601==2.3.0
-click==8.1.3
-click-default-group==1.2.2
+click==8.1.7
+click-default-group==1.2.4
 click-spinner==0.1.10
 clickclick==20.10.2
 clickhouse-cityhash==1.0.2.4
@@ -66,205 +62,217 @@ clickhouse-sqlalchemy==0.2.4
 cloudpickle==2.2.1
 colorama==0.4.6
 colorlog==4.8.0
-confection==0.0.4
+comm==0.1.4
+confection==0.1.3
 ConfigUpdater==3.1.1
 confluent-kafka==1.8.2
 connexion==2.14.2
 cron-descriptor==1.4.0
-croniter==1.3.15
-cryptography==37.0.4
+croniter==2.0.1
+cryptography==41.0.4
 cx-Oracle==8.3.0
-cymem==2.0.7
-dask==2023.5.1
-databricks-cli==0.17.7
+cymem==2.0.8
+dask==2023.9.3
+databricks-cli==0.18.0
 databricks-dbapi==0.6.0
-databricks-sdk==0.1.8
-debugpy==1.6.7
+databricks-sdk==0.10.0
+debugpy==1.8.0
 decorator==5.1.1
 defusedxml==0.7.1
-deltalake==0.9.0
+deltalake==0.11.0
 Deprecated==1.2.14
-dill==0.3.6
-dnspython==2.3.0
-docker==6.1.2
+dill==0.3.7
+dnspython==2.4.2
+docker==6.1.3
 docutils==0.20.1
 ecdsa==0.18.0
 elasticsearch==7.13.4
 email-validator==1.3.1
 entrypoints==0.4
 et-xmlfile==1.1.0
-exceptiongroup==1.1.1
-executing==1.2.0
-expandvars==0.9.0
-fastapi==0.95.2
-fastavro==1.7.4
-fastjsonschema==2.17.1
-feast==0.29.0
-filelock==3.12.0
+exceptiongroup==1.1.3
+executing==2.0.0
+expandvars==0.11.0
+fastapi==0.103.2
+fastavro==1.8.4
+fastjsonschema==2.18.1
+feast==0.31.1
+filelock==3.12.4
 fissix==21.11.13
 Flask==2.2.5
 flatdict==4.0.1
-frozenlist==1.3.3
-fsspec==2023.5.0
+frozenlist==1.4.0
+fsspec==2023.9.2
 future==0.18.3
-GeoAlchemy2==0.13.3
+GeoAlchemy2==0.14.1
 gitdb==4.0.10
-GitPython==3.1.31
-google-api-core==2.11.0
-google-auth==2.19.0
-google-cloud-appengine-logging==1.3.0
+GitPython==3.1.37
+google-api-core==2.12.0
+google-auth==2.23.3
+google-cloud-appengine-logging==1.3.2
 google-cloud-audit-log==0.2.5
-google-cloud-bigquery==3.10.0
-google-cloud-bigquery-storage==2.19.1
-google-cloud-core==2.3.2
+google-cloud-bigquery==3.12.0
+google-cloud-core==2.3.3
 google-cloud-datacatalog-lineage==0.2.2
 google-cloud-logging==3.5.0
 google-crc32c==1.5.0
-google-resumable-media==2.5.0
-googleapis-common-protos==1.59.0
+google-re2==1.1
+google-resumable-media==2.6.0
+googleapis-common-protos==1.60.0
 gql==3.4.1
 graphql-core==3.2.3
 graphviz==0.20.1
 great-expectations==0.15.50
-greenlet==2.0.2
+greenlet==3.0.0
 grpc-google-iam-v1==0.12.6
-grpcio==1.54.2
-grpcio-reflection==1.54.2
-grpcio-status==1.54.2
-grpcio-tools==1.54.2
-gssapi==1.8.2
-gunicorn==20.1.0
+grpcio==1.59.0
+grpcio-reflection==1.59.0
+grpcio-status==1.59.0
+grpcio-tools==1.59.0
+gssapi==1.8.3
+gunicorn==21.2.0
 h11==0.14.0
-hmsclient==0.1.1
-httpcore==0.17.2
-httptools==0.5.0
-httpx==0.24.1
+httpcore==0.18.0
+httptools==0.6.0
+httpx==0.25.0
 humanfriendly==10.0
 idna==3.4
-ijson==3.2.0.post0
-importlib-metadata==6.6.0
-importlib-resources==5.12.0
+ijson==3.2.3
+importlib-metadata==6.8.0
+importlib-resources==6.1.0
 inflection==0.5.1
 ipaddress==1.0.23
 ipykernel==6.17.1
-ipython==8.13.2
+ipython==8.16.1
 ipython-genutils==0.2.0
-ipywidgets==8.0.6
+ipywidgets==8.1.1
 iso3166==2.1.1
 isodate==0.6.1
 itsdangerous==2.1.2
-jedi==0.18.2
+jedi==0.19.1
 Jinja2==3.1.2
 jmespath==1.0.1
 JPype1==1.4.1
-jsonlines==3.1.0
-jsonpatch==1.32
-jsonpointer==2.3
+jsonlines==4.0.0
+jsonpatch==1.33
+jsonpointer==2.4
 jsonref==1.1.0
-jsonschema==4.17.3
+jsonschema==4.19.1
+jsonschema-specifications==2023.7.1
 jupyter-server==1.24.0
 jupyter_client==7.4.9
 jupyter_core==4.12.0
 jupyterlab-pygments==0.2.2
-jupyterlab-widgets==3.0.7
+jupyterlab-widgets==3.0.9
 langcodes==3.3.0
 lark==1.1.4
 lazy-object-proxy==1.9.0
 leb128==1.0.5
-limits==3.5.0
+limits==3.6.0
 linear-tsv==1.1.0
 linkify-it-py==2.0.2
 lkml==1.3.1
 locket==1.0.0
 lockfile==0.12.2
 looker-sdk==23.0.0
-lxml==4.9.2
+lxml==4.9.3
 lz4==4.3.2
 makefun==1.15.1
 Mako==1.2.4
-Markdown==3.4.3
-markdown-it-py==2.2.0
-MarkupSafe==2.1.2
-marshmallow==3.19.0
-marshmallow-enum==1.5.1
+Markdown==3.5
+markdown-it-py==3.0.0
+MarkupSafe==2.1.3
+marshmallow==3.20.1
 marshmallow-oneofschema==3.0.1
 marshmallow-sqlalchemy==0.26.1
 matplotlib-inline==0.1.6
-mdit-py-plugins==0.3.5
+mdit-py-plugins==0.4.0
 mdurl==0.1.2
-mistune==2.0.5
+mistune==3.0.2
 mixpanel==4.10.0
-mmh3==4.0.0
-more-itertools==9.1.0
+mlflow-skinny==2.7.1
+mmh3==4.0.1
+mmhash3==3.0.1
+more-itertools==10.1.0
 moreorless==0.4.0
-moto==4.1.10
-msal==1.16.0
-msal-extensions==1.0.0
+moto==4.2.5
+msal==1.22.0
 multidict==6.0.4
-murmurhash==1.0.9
-mypy==1.3.0
+murmurhash==1.0.10
+mypy==1.6.0
 mypy-extensions==1.0.0
 nbclassic==1.0.0
 nbclient==0.6.3
-nbconvert==7.4.0
-nbformat==5.8.0
-nest-asyncio==1.5.6
+nbconvert==7.9.2
+nbformat==5.9.1
+nest-asyncio==1.5.8
 networkx==3.1
-notebook==6.5.4
+notebook==6.5.6
 notebook_shim==0.2.3
-numpy==1.24.3
+numpy==1.26.0
 oauthlib==3.2.2
 okta==1.7.0
+openlineage-airflow==1.2.0
+openlineage-integration-common==1.2.0
+openlineage-python==1.2.0
+openlineage_sql==1.2.0
 openpyxl==3.1.2
+opentelemetry-api==1.20.0
+opentelemetry-exporter-otlp==1.20.0
+opentelemetry-exporter-otlp-proto-common==1.20.0
+opentelemetry-exporter-otlp-proto-grpc==1.20.0
+opentelemetry-exporter-otlp-proto-http==1.20.0
+opentelemetry-proto==1.20.0
+opentelemetry-sdk==1.20.0
+opentelemetry-semantic-conventions==0.41b0
 ordered-set==4.1.0
 oscrypto==1.3.0
-packaging==23.1
+packaging==23.2
 pandas==1.5.3
 pandavro==1.5.2
 pandocfilters==1.5.0
-parse==1.19.0
+parse==1.19.1
 parso==0.8.3
-partd==1.4.0
-pathspec==0.9.0
-pathy==0.10.1
+partd==1.4.1
+pathspec==0.11.2
+pathy==0.10.2
 pendulum==2.1.2
 pexpect==4.8.0
 phonenumbers==8.13.0
 pickleshare==0.7.5
-platformdirs==3.5.1
-pluggy==1.0.0
-portalocker==2.7.0
-preshed==3.0.8
+platformdirs==3.11.0
+pluggy==1.3.0
+preshed==3.0.9
 prison==0.2.1
 progressbar2==4.2.0
-prometheus-client==0.17.0
-prompt-toolkit==3.0.38
-proto-plus==1.22.2
-protobuf==4.23.2
+prometheus-client==0.17.1
+prompt-toolkit==3.0.39
+proto-plus==1.22.3
+protobuf==4.24.4
 psutil==5.9.5
-psycopg2-binary==2.9.6
+psycopg2-binary==2.9.9
 ptyprocess==0.7.0
 pure-eval==0.2.2
 pure-sasl==0.6.2
-py-partiql-parser==0.3.0
-pyarrow==8.0.0
+py-partiql-parser==0.3.7
+pyarrow==11.0.0
 pyasn1==0.5.0
 pyasn1-modules==0.3.0
 pyathena==2.4.1
 pycountry==22.3.5
 pycparser==2.21
-pycryptodome==3.18.0
-pycryptodomex==3.18.0
-pydantic==1.10.8
-pydash==7.0.3
+pycryptodome==3.19.0
+pycryptodomex==3.19.0
+pydantic==1.10.13
+pydash==7.0.6
 pydruid==0.6.5
-Pygments==2.15.1
-pymongo==4.3.3
-PyMySQL==1.0.3
-pyOpenSSL==22.0.0
+Pygments==2.16.1
+pyiceberg==0.4.0
+pymongo==4.5.0
+PyMySQL==1.1.0
+pyOpenSSL==23.2.0
 pyparsing==3.0.9
-pyrsistent==0.19.3
-pyspnego==0.9.0
+pyspnego==0.10.2
 python-daemon==3.0.1
 python-dateutil==2.8.2
 python-dotenv==1.0.0
@@ -272,111 +280,115 @@ python-jose==3.3.0
 python-ldap==3.4.3
 python-nvd3==0.15.0
 python-slugify==8.0.1
-python-stdnum==1.18
-python-tds==1.12.0
-python-utils==3.6.0
+python-stdnum==1.19
+python-tds==1.13.0
+python-utils==3.8.1
 python3-openid==3.2.0
-pytz==2023.3
+pytz==2023.3.post1
 pytzdata==2020.1
-PyYAML==6.0
-pyzmq==25.1.0
+PyYAML==6.0.1
+pyzmq==24.0.1
 ratelimiter==1.2.0.post0
 redash-toolbelt==0.1.9
-redshift-connector==2.0.910
-regex==2023.5.5
-requests==2.28.2
+redshift-connector==2.0.914
+referencing==0.30.2
+regex==2023.10.3
+requests==2.31.0
 requests-file==1.5.1
 requests-gssapi==1.2.3
 requests-ntlm==1.2.0
 requests-toolbelt==0.10.1
-responses==0.23.1
-retrying==1.3.4
+responses==0.23.3
 rfc3339-validator==0.1.4
 rfc3986==2.0.0
-rich==13.3.5
-rich_argparse==1.1.0
+rich==13.6.0
+rich-argparse==1.3.0
+rpds-py==0.10.6
 rsa==4.9
 ruamel.yaml==0.17.17
-s3transfer==0.6.1
-sasl3==0.2.11
-schwifty==2023.3.0
-scipy==1.10.1
+ruamel.yaml.clib==0.2.8
+s3transfer==0.7.0
+schwifty==2023.9.0
+scipy==1.11.3
 scramp==1.4.4
 Send2Trash==1.8.2
-setproctitle==1.3.2
-simple-salesforce==1.12.4
+sentry-sdk==1.32.0
+setproctitle==1.3.3
+simple-salesforce==1.12.5
 six==1.16.0
-smart-open==6.3.0
-smmap==5.0.0
+smart-open==6.4.0
+smmap==5.0.1
 sniffio==1.3.0
-snowflake-connector-python==2.9.0
-snowflake-sqlalchemy==1.4.7
-soupsieve==2.4.1
+snowflake-connector-python==3.2.1
+snowflake-sqlalchemy==1.5.0
+sortedcontainers==2.4.0
+soupsieve==2.5
 spacy==3.4.3
 spacy-legacy==3.0.12
-spacy-loggers==1.0.4
+spacy-loggers==1.0.5
 sql-metadata==2.2.2
-SQLAlchemy==1.4.41
-sqlalchemy-bigquery==1.6.1
+SQLAlchemy==1.4.44
+sqlalchemy-bigquery==1.8.0
 SQLAlchemy-JSONField==1.0.1.post0
 sqlalchemy-pytds==0.3.5
 sqlalchemy-redshift==0.8.14
 SQLAlchemy-Utils==0.41.1
-sqlalchemy2-stubs==0.0.2a34
-sqllineage==1.3.6
-sqlparse==0.4.3
-srsly==2.4.6
-stack-data==0.6.2
+sqlalchemy2-stubs==0.0.2a35
+sqllineage==1.3.8
+sqlparse==0.4.4
+srsly==2.4.8
+stack-data==0.6.3
 starlette==0.27.0
+strictyaml==1.7.3
 tableauserverclient==0.25
 tableschema==1.20.2
 tabulate==0.9.0
 tabulator==1.53.5
-tenacity==8.2.2
+tenacity==8.2.3
 termcolor==2.3.0
 terminado==0.17.1
 text-unidecode==1.3
-thinc==8.1.10
-thrift==0.16.0
+thinc==8.1.12
+thrift==0.13.0
 thrift-sasl==0.4.3
 tinycss2==1.2.1
 toml==0.10.2
 tomli==2.0.1
+tomlkit==0.12.1
 toolz==0.12.0
-tornado==6.3.2
-tqdm==4.65.0
+tornado==6.3.3
+tqdm==4.66.1
 traitlets==5.2.1.post0
-trino==0.324.0
+trino==0.327.0
 typeguard==2.13.3
 typer==0.7.0
-types-PyYAML==6.0.12.10
+types-PyYAML==6.0.12.12
 typing-inspect==0.9.0
-typing_extensions==4.5.0
-tzlocal==5.0.1
+typing_extensions==4.8.0
+tzlocal==5.1
 uc-micro-py==1.0.2
-ujson==5.7.0
+ujson==5.8.0
 unicodecsv==0.14.1
-urllib3==1.26.16
-uvicorn==0.22.0
+urllib3==1.26.17
+uvicorn==0.23.2
 uvloop==0.17.0
-vertica-python==1.3.2
-vertica-sqlalchemy-dialect==0.0.1
+vertica-python==1.3.5
+vertica-sqlalchemy-dialect==0.0.8
 vininfo==1.7.0
 volatile==2.1.0
 wasabi==0.10.1
-watchfiles==0.19.0
-wcmatch==8.4.1
-wcwidth==0.2.6
+watchfiles==0.20.0
+wcmatch==8.5
+wcwidth==0.2.8
 webencodings==0.5.1
-websocket-client==1.5.2
+websocket-client==1.6.4
 websockets==11.0.3
 Werkzeug==2.2.3
-widgetsnbextension==4.0.7
+widgetsnbextension==4.0.9
 wrapt==1.15.0
-WTForms==3.0.1
+WTForms==3.1.0
 xlrd==2.0.1
 xmltodict==0.13.0
 yarl==1.9.2
 zeep==4.2.1
-zipp==3.15.0
-zstd==1.5.5.1
+zstd==1.5.5.1
\ No newline at end of file

From c02cbb31e2896f9b596bc329af2e86459057b37e Mon Sep 17 00:00:00 2001
From: Aseem Bansal <asmbansal2@gmail.com>
Date: Fri, 13 Oct 2023 17:52:53 +0530
Subject: [PATCH 128/156] docs(Acryl DataHub): release notes for 0.2.12 (#9006)

---
 docs-website/sidebars.js                      |  1 +
 .../managed-datahub/release-notes/v_0_2_11.md |  2 +-
 .../managed-datahub/release-notes/v_0_2_12.md | 30 +++++++++++++++++++
 3 files changed, 32 insertions(+), 1 deletion(-)
 create mode 100644 docs/managed-datahub/release-notes/v_0_2_12.md

diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index 21b3a1d3fe4d3..4fa73c995157a 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -608,6 +608,7 @@ module.exports = {
         },
         {
           "Managed DataHub Release History": [
+            "docs/managed-datahub/release-notes/v_0_2_12",
             "docs/managed-datahub/release-notes/v_0_2_11",
             "docs/managed-datahub/release-notes/v_0_2_10",
             "docs/managed-datahub/release-notes/v_0_2_9",
diff --git a/docs/managed-datahub/release-notes/v_0_2_11.md b/docs/managed-datahub/release-notes/v_0_2_11.md
index 1f42090848712..c99d10201e097 100644
--- a/docs/managed-datahub/release-notes/v_0_2_11.md
+++ b/docs/managed-datahub/release-notes/v_0_2_11.md
@@ -7,7 +7,7 @@ Release Availability Date
 
 Recommended CLI/SDK
 ---
-- `v0.11.0` with release notes at https://github.com/acryldata/datahub/releases/tag/v0.10.5.5
+- `v0.11.0` with release notes at https://github.com/acryldata/datahub/releases/tag/v0.11.0
 - [Deprecation] In LDAP ingestor, the manager_pagination_enabled changed to general pagination_enabled
 
 If you are using an older CLI/SDK version then please upgrade it. This applies for all CLI/SDK usages, if you are using it through your terminal, github actions, airflow, in python SDK somewhere, Java SKD etc. This is a strong recommendation to upgrade as we keep on pushing fixes in the CLI and it helps us support you better.
diff --git a/docs/managed-datahub/release-notes/v_0_2_12.md b/docs/managed-datahub/release-notes/v_0_2_12.md
new file mode 100644
index 0000000000000..b13f471d9bf63
--- /dev/null
+++ b/docs/managed-datahub/release-notes/v_0_2_12.md
@@ -0,0 +1,30 @@
+# v0.2.12
+---
+
+Release Availability Date
+---
+13-Oct-2023
+
+Recommended CLI/SDK
+---
+- `v0.11.0.4` with release notes at https://github.com/acryldata/datahub/releases/tag/v0.11.0.4
+- [breaking] Removed support for SQLAlchemy 1.3.x. Only SQLAlchemy 1.4.x is supported now.
+- [breaking] Removed `urn:li:corpuser:datahub` owner for the `Measure`, `Dimension` and `Temporal` tags emitted by Looker and LookML source connectors.
+- [breaking] The Airflow plugin no longer supports Airflow 2.0.x or Python 3.7.
+- [breaking] Introduced the Airflow plugin v2. If you're using Airflow 2.3+, the v2 plugin will be enabled by default, and so you'll need to switch your requirements to include `pip install 'acryl-datahub-airflow-plugin[plugin-v2]'`. To continue using the v1 plugin, set the `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN` environment variable to `true`.
+- [breaking] The Unity Catalog ingestion source has a new option `include_metastore`, which will cause all urns to be changed when disabled.
+This is currently enabled by default to preserve compatibility, but will be disabled by default and then removed in the future.
+If stateful ingestion is enabled, simply setting `include_metastore: false` will perform all required cleanup.
+Otherwise, we recommend soft deleting all databricks data via the DataHub CLI:
+`datahub delete --platform databricks --soft` and then reingesting with `include_metastore: false`.
+
+
+If you are using an older CLI/SDK version then please upgrade it. This applies for all CLI/SDK usages, if you are using it through your terminal, github actions, airflow, in python SDK somewhere, Java SKD etc. This is a strong recommendation to upgrade as we keep on pushing fixes in the CLI and it helps us support you better.
+
+
+## Release Changelog
+---
+- Since `v0.2.11` these changes from OSS DataHub https://github.com/datahub-project/datahub/compare/75252a3d9f6a576904be5a0790d644b9ae2df6ac...10a190470e8c932b6d34cba49de7dbcba687a088 have been pulled in.
+
+## Some notable features in this SaaS release
+- Nested Domains available in this release

From 6bc742535379f6cc4558daa67b6561d549d6e607 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Fri, 13 Oct 2023 12:36:18 -0400
Subject: [PATCH 129/156] feat(cli/datacontract): Add data quality assertion
 support (#8968)

Co-authored-by: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
Co-authored-by: Aseem Bansal <asmbansal2@gmail.com>
---
 .../api/entities/datacontract/assertion.py    |   7 +
 .../datacontract/assertion_operator.py        | 162 ++++++++++++++++++
 .../datacontract/data_quality_assertion.py    |  60 ++++---
 .../api/entities/datacontract/datacontract.py |   2 +-
 .../datacontract/freshness_assertion.py       |  54 +++---
 .../entities/datacontract/schema_assertion.py |  17 +-
 .../api/entities/datacontract/__init__.py     |   0
 .../test_data_quality_assertion.py            |  55 ++++++
 8 files changed, 292 insertions(+), 65 deletions(-)
 create mode 100644 metadata-ingestion/src/datahub/api/entities/datacontract/assertion.py
 create mode 100644 metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py
 create mode 100644 metadata-ingestion/tests/unit/api/entities/datacontract/__init__.py
 create mode 100644 metadata-ingestion/tests/unit/api/entities/datacontract/test_data_quality_assertion.py

diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/assertion.py
new file mode 100644
index 0000000000000..c45d4ddc92458
--- /dev/null
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/assertion.py
@@ -0,0 +1,7 @@
+from typing import Optional
+
+from datahub.configuration import ConfigModel
+
+
+class BaseAssertion(ConfigModel):
+    description: Optional[str] = None
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py b/metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py
new file mode 100644
index 0000000000000..a41b0f7aafd9f
--- /dev/null
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/assertion_operator.py
@@ -0,0 +1,162 @@
+from typing import Optional, Union
+
+from typing_extensions import Literal, Protocol
+
+from datahub.configuration import ConfigModel
+from datahub.metadata.schema_classes import (
+    AssertionStdOperatorClass,
+    AssertionStdParameterClass,
+    AssertionStdParametersClass,
+    AssertionStdParameterTypeClass,
+)
+
+
+class Operator(Protocol):
+    """Specification for an assertion operator.
+
+    This class exists only for documentation (not used in typing checking).
+    """
+
+    operator: str
+
+    def id(self) -> str:
+        ...
+
+    def generate_parameters(self) -> AssertionStdParametersClass:
+        ...
+
+
+def _generate_assertion_std_parameter(
+    value: Union[str, int, float]
+) -> AssertionStdParameterClass:
+    if isinstance(value, str):
+        return AssertionStdParameterClass(
+            value=value, type=AssertionStdParameterTypeClass.STRING
+        )
+    elif isinstance(value, (int, float)):
+        return AssertionStdParameterClass(
+            value=str(value), type=AssertionStdParameterTypeClass.NUMBER
+        )
+    else:
+        raise ValueError(
+            f"Unsupported assertion parameter {value} of type {type(value)}"
+        )
+
+
+Param = Union[str, int, float]
+
+
+def _generate_assertion_std_parameters(
+    value: Optional[Param] = None,
+    min_value: Optional[Param] = None,
+    max_value: Optional[Param] = None,
+) -> AssertionStdParametersClass:
+    return AssertionStdParametersClass(
+        value=_generate_assertion_std_parameter(value) if value else None,
+        minValue=_generate_assertion_std_parameter(min_value) if min_value else None,
+        maxValue=_generate_assertion_std_parameter(max_value) if max_value else None,
+    )
+
+
+class EqualToOperator(ConfigModel):
+    type: Literal["equal_to"]
+    value: Union[str, int, float]
+
+    operator: str = AssertionStdOperatorClass.EQUAL_TO
+
+    def id(self) -> str:
+        return f"{self.type}-{self.value}"
+
+    def generate_parameters(self) -> AssertionStdParametersClass:
+        return _generate_assertion_std_parameters(value=self.value)
+
+
+class BetweenOperator(ConfigModel):
+    type: Literal["between"]
+    min: Union[int, float]
+    max: Union[int, float]
+
+    operator: str = AssertionStdOperatorClass.BETWEEN
+
+    def id(self) -> str:
+        return f"{self.type}-{self.min}-{self.max}"
+
+    def generate_parameters(self) -> AssertionStdParametersClass:
+        return _generate_assertion_std_parameters(
+            min_value=self.min, max_value=self.max
+        )
+
+
+class LessThanOperator(ConfigModel):
+    type: Literal["less_than"]
+    value: Union[int, float]
+
+    operator: str = AssertionStdOperatorClass.LESS_THAN
+
+    def id(self) -> str:
+        return f"{self.type}-{self.value}"
+
+    def generate_parameters(self) -> AssertionStdParametersClass:
+        return _generate_assertion_std_parameters(value=self.value)
+
+
+class GreaterThanOperator(ConfigModel):
+    type: Literal["greater_than"]
+    value: Union[int, float]
+
+    operator: str = AssertionStdOperatorClass.GREATER_THAN
+
+    def id(self) -> str:
+        return f"{self.type}-{self.value}"
+
+    def generate_parameters(self) -> AssertionStdParametersClass:
+        return _generate_assertion_std_parameters(value=self.value)
+
+
+class LessThanOrEqualToOperator(ConfigModel):
+    type: Literal["less_than_or_equal_to"]
+    value: Union[int, float]
+
+    operator: str = AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO
+
+    def id(self) -> str:
+        return f"{self.type}-{self.value}"
+
+    def generate_parameters(self) -> AssertionStdParametersClass:
+        return _generate_assertion_std_parameters(value=self.value)
+
+
+class GreaterThanOrEqualToOperator(ConfigModel):
+    type: Literal["greater_than_or_equal_to"]
+    value: Union[int, float]
+
+    operator: str = AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO
+
+    def id(self) -> str:
+        return f"{self.type}-{self.value}"
+
+    def generate_parameters(self) -> AssertionStdParametersClass:
+        return _generate_assertion_std_parameters(value=self.value)
+
+
+class NotNullOperator(ConfigModel):
+    type: Literal["not_null"]
+
+    operator: str = AssertionStdOperatorClass.NOT_NULL
+
+    def id(self) -> str:
+        return f"{self.type}"
+
+    def generate_parameters(self) -> AssertionStdParametersClass:
+        return _generate_assertion_std_parameters()
+
+
+Operators = Union[
+    EqualToOperator,
+    BetweenOperator,
+    LessThanOperator,
+    LessThanOrEqualToOperator,
+    GreaterThanOperator,
+    GreaterThanOrEqualToOperator,
+    NotNullOperator,
+]
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py
index a665e95e93c43..6a3944ba36baf 100644
--- a/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/data_quality_assertion.py
@@ -4,6 +4,8 @@
 from typing_extensions import Literal
 
 import datahub.emitter.mce_builder as builder
+from datahub.api.entities.datacontract.assertion import BaseAssertion
+from datahub.api.entities.datacontract.assertion_operator import Operators
 from datahub.configuration.common import ConfigModel
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.metadata.schema_classes import (
@@ -14,12 +16,15 @@
     AssertionStdParametersClass,
     AssertionStdParameterTypeClass,
     AssertionTypeClass,
+    AssertionValueChangeTypeClass,
     DatasetAssertionInfoClass,
     DatasetAssertionScopeClass,
+    SqlAssertionInfoClass,
+    SqlAssertionTypeClass,
 )
 
 
-class IdConfigMixin(ConfigModel):
+class IdConfigMixin(BaseAssertion):
     id_raw: Optional[str] = pydantic.Field(
         default=None,
         alias="id",
@@ -30,25 +35,32 @@ def generate_default_id(self) -> str:
         raise NotImplementedError
 
 
-class CustomSQLAssertion(IdConfigMixin, ConfigModel):
+class CustomSQLAssertion(IdConfigMixin, BaseAssertion):
     type: Literal["custom_sql"]
-
     sql: str
+    operator: Operators = pydantic.Field(discriminator="type")
 
-    def generate_dataset_assertion_info(
-        self, entity_urn: str
-    ) -> DatasetAssertionInfoClass:
-        return DatasetAssertionInfoClass(
-            dataset=entity_urn,
-            scope=DatasetAssertionScopeClass.UNKNOWN,
-            fields=[],
-            operator=AssertionStdOperatorClass._NATIVE_,
-            aggregation=AssertionStdAggregationClass._NATIVE_,
-            logic=self.sql,
+    def generate_default_id(self) -> str:
+        return f"{self.type}-{self.sql}-{self.operator.id()}"
+
+    def generate_assertion_info(self, entity_urn: str) -> AssertionInfoClass:
+        sql_assertion_info = SqlAssertionInfoClass(
+            entity=entity_urn,
+            statement=self.sql,
+            operator=self.operator.operator,
+            parameters=self.operator.generate_parameters(),
+            # TODO: Support other types of assertions
+            type=SqlAssertionTypeClass.METRIC,
+            changeType=AssertionValueChangeTypeClass.ABSOLUTE,
+        )
+        return AssertionInfoClass(
+            type=AssertionTypeClass.SQL,
+            sqlAssertion=sql_assertion_info,
+            description=self.description,
         )
 
 
-class ColumnUniqueAssertion(IdConfigMixin, ConfigModel):
+class ColumnUniqueAssertion(IdConfigMixin, BaseAssertion):
     type: Literal["unique"]
 
     # TODO: support multiple columns?
@@ -57,10 +69,8 @@ class ColumnUniqueAssertion(IdConfigMixin, ConfigModel):
     def generate_default_id(self) -> str:
         return f"{self.type}-{self.column}"
 
-    def generate_dataset_assertion_info(
-        self, entity_urn: str
-    ) -> DatasetAssertionInfoClass:
-        return DatasetAssertionInfoClass(
+    def generate_assertion_info(self, entity_urn: str) -> AssertionInfoClass:
+        dataset_assertion_info = DatasetAssertionInfoClass(
             dataset=entity_urn,
             scope=DatasetAssertionScopeClass.DATASET_COLUMN,
             fields=[builder.make_schema_field_urn(entity_urn, self.column)],
@@ -72,6 +82,11 @@ def generate_dataset_assertion_info(
                 )
             ),
         )
+        return AssertionInfoClass(
+            type=AssertionTypeClass.DATASET,
+            datasetAssertion=dataset_assertion_info,
+            description=self.description,
+        )
 
 
 class DataQualityAssertion(ConfigModel):
@@ -92,16 +107,9 @@ def id(self) -> str:
     def generate_mcp(
         self, assertion_urn: str, entity_urn: str
     ) -> List[MetadataChangeProposalWrapper]:
-        dataset_assertion_info = self.__root__.generate_dataset_assertion_info(
-            entity_urn
-        )
-
         return [
             MetadataChangeProposalWrapper(
                 entityUrn=assertion_urn,
-                aspect=AssertionInfoClass(
-                    type=AssertionTypeClass.DATASET,
-                    datasetAssertion=dataset_assertion_info,
-                ),
+                aspect=self.__root__.generate_assertion_info(entity_urn),
             )
         ]
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py b/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py
index 2df446623a9d6..f3c6be55e5fea 100644
--- a/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py
@@ -54,7 +54,7 @@ class DataContract(ConfigModel):
     freshness: Optional[FreshnessAssertion] = pydantic.Field(default=None)
 
     # TODO: Add a validator to ensure that ids are unique
-    data_quality: Optional[List[DataQualityAssertion]] = None
+    data_quality: Optional[List[DataQualityAssertion]] = pydantic.Field(default=None)
 
     _original_yaml_dict: Optional[dict] = None
 
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py
index ee8fa1181e614..71741d76b22fc 100644
--- a/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/freshness_assertion.py
@@ -6,6 +6,7 @@
 import pydantic
 from typing_extensions import Literal
 
+from datahub.api.entities.datacontract.assertion import BaseAssertion
 from datahub.configuration.common import ConfigModel
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.metadata.schema_classes import (
@@ -21,7 +22,7 @@
 )
 
 
-class CronFreshnessAssertion(ConfigModel):
+class CronFreshnessAssertion(BaseAssertion):
     type: Literal["cron"]
 
     cron: str = pydantic.Field(
@@ -32,12 +33,30 @@ class CronFreshnessAssertion(ConfigModel):
         description="The timezone to use for the cron schedule. Defaults to UTC.",
     )
 
+    def generate_freshness_assertion_schedule(self) -> FreshnessAssertionScheduleClass:
+        return FreshnessAssertionScheduleClass(
+            type=FreshnessAssertionScheduleTypeClass.CRON,
+            cron=FreshnessCronScheduleClass(
+                cron=self.cron,
+                timezone=self.timezone,
+            ),
+        )
+
 
-class FixedIntervalFreshnessAssertion(ConfigModel):
+class FixedIntervalFreshnessAssertion(BaseAssertion):
     type: Literal["interval"]
 
     interval: timedelta
 
+    def generate_freshness_assertion_schedule(self) -> FreshnessAssertionScheduleClass:
+        return FreshnessAssertionScheduleClass(
+            type=FreshnessAssertionScheduleTypeClass.FIXED_INTERVAL,
+            fixedInterval=FixedIntervalScheduleClass(
+                unit=CalendarIntervalClass.SECOND,
+                multiple=int(self.interval.total_seconds()),
+            ),
+        )
+
 
 class FreshnessAssertion(ConfigModel):
     __root__: Union[
@@ -51,36 +70,13 @@ def id(self):
     def generate_mcp(
         self, assertion_urn: str, entity_urn: str
     ) -> List[MetadataChangeProposalWrapper]:
-        freshness = self.__root__
-
-        if isinstance(freshness, CronFreshnessAssertion):
-            schedule = FreshnessAssertionScheduleClass(
-                type=FreshnessAssertionScheduleTypeClass.CRON,
-                cron=FreshnessCronScheduleClass(
-                    cron=freshness.cron,
-                    timezone=freshness.timezone,
-                ),
-            )
-        elif isinstance(freshness, FixedIntervalFreshnessAssertion):
-            schedule = FreshnessAssertionScheduleClass(
-                type=FreshnessAssertionScheduleTypeClass.FIXED_INTERVAL,
-                fixedInterval=FixedIntervalScheduleClass(
-                    unit=CalendarIntervalClass.SECOND,
-                    multiple=int(freshness.interval.total_seconds()),
-                ),
-            )
-        else:
-            raise ValueError(f"Unknown freshness type {freshness}")
-
-        assertionInfo = AssertionInfoClass(
+        aspect = AssertionInfoClass(
             type=AssertionTypeClass.FRESHNESS,
             freshnessAssertion=FreshnessAssertionInfoClass(
                 entity=entity_urn,
                 type=FreshnessAssertionTypeClass.DATASET_CHANGE,
-                schedule=schedule,
+                schedule=self.__root__.generate_freshness_assertion_schedule(),
             ),
+            description=self.__root__.description,
         )
-
-        return [
-            MetadataChangeProposalWrapper(entityUrn=assertion_urn, aspect=assertionInfo)
-        ]
+        return [MetadataChangeProposalWrapper(entityUrn=assertion_urn, aspect=aspect)]
diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py b/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py
index b5b592e01f58f..b62f94e0592fc 100644
--- a/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py
+++ b/metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py
@@ -6,6 +6,7 @@
 import pydantic
 from typing_extensions import Literal
 
+from datahub.api.entities.datacontract.assertion import BaseAssertion
 from datahub.configuration.common import ConfigModel
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.extractor.json_schema_util import get_schema_metadata
@@ -19,7 +20,7 @@
 )
 
 
-class JsonSchemaContract(ConfigModel):
+class JsonSchemaContract(BaseAssertion):
     type: Literal["json-schema"]
 
     json_schema: dict = pydantic.Field(alias="json-schema")
@@ -36,7 +37,7 @@ def _init_private_attributes(self) -> None:
         )
 
 
-class FieldListSchemaContract(ConfigModel, arbitrary_types_allowed=True):
+class FieldListSchemaContract(BaseAssertion, arbitrary_types_allowed=True):
     type: Literal["field-list"]
 
     fields: List[SchemaFieldClass]
@@ -67,15 +68,13 @@ def id(self):
     def generate_mcp(
         self, assertion_urn: str, entity_urn: str
     ) -> List[MetadataChangeProposalWrapper]:
-        schema_metadata = self.__root__._schema_metadata
-
-        assertionInfo = AssertionInfoClass(
+        aspect = AssertionInfoClass(
             type=AssertionTypeClass.DATA_SCHEMA,
             schemaAssertion=SchemaAssertionInfoClass(
-                entity=entity_urn, schema=schema_metadata
+                entity=entity_urn,
+                schema=self.__root__._schema_metadata,
             ),
+            description=self.__root__.description,
         )
 
-        return [
-            MetadataChangeProposalWrapper(entityUrn=assertion_urn, aspect=assertionInfo)
-        ]
+        return [MetadataChangeProposalWrapper(entityUrn=assertion_urn, aspect=aspect)]
diff --git a/metadata-ingestion/tests/unit/api/entities/datacontract/__init__.py b/metadata-ingestion/tests/unit/api/entities/datacontract/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/metadata-ingestion/tests/unit/api/entities/datacontract/test_data_quality_assertion.py b/metadata-ingestion/tests/unit/api/entities/datacontract/test_data_quality_assertion.py
new file mode 100644
index 0000000000000..7be8b667a500b
--- /dev/null
+++ b/metadata-ingestion/tests/unit/api/entities/datacontract/test_data_quality_assertion.py
@@ -0,0 +1,55 @@
+from datahub.api.entities.datacontract.data_quality_assertion import (
+    DataQualityAssertion,
+)
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.metadata.schema_classes import (
+    AssertionInfoClass,
+    AssertionStdOperatorClass,
+    AssertionStdParameterClass,
+    AssertionStdParametersClass,
+    AssertionStdParameterTypeClass,
+    AssertionTypeClass,
+    AssertionValueChangeTypeClass,
+    SqlAssertionInfoClass,
+    SqlAssertionTypeClass,
+)
+
+
+def test_parse_sql_assertion():
+    assertion_urn = "urn:li:assertion:a"
+    entity_urn = "urn:li:dataset:d"
+    statement = "SELECT COUNT(*) FROM my_table WHERE value IS NOT NULL"
+
+    d = {
+        "type": "custom_sql",
+        "sql": statement,
+        "operator": {"type": "between", "min": 5, "max": 10},
+    }
+
+    assert DataQualityAssertion.parse_obj(d).generate_mcp(
+        assertion_urn, entity_urn
+    ) == [
+        MetadataChangeProposalWrapper(
+            entityUrn=assertion_urn,
+            aspect=AssertionInfoClass(
+                type=AssertionTypeClass.SQL,
+                sqlAssertion=SqlAssertionInfoClass(
+                    type=SqlAssertionTypeClass.METRIC,
+                    changeType=AssertionValueChangeTypeClass.ABSOLUTE,
+                    entity=entity_urn,
+                    statement="SELECT COUNT(*) FROM my_table WHERE value IS NOT NULL",
+                    operator=AssertionStdOperatorClass.BETWEEN,
+                    parameters=AssertionStdParametersClass(
+                        minValue=AssertionStdParameterClass(
+                            value="5",
+                            type=AssertionStdParameterTypeClass.NUMBER,
+                        ),
+                        maxValue=AssertionStdParameterClass(
+                            value="10",
+                            type=AssertionStdParameterTypeClass.NUMBER,
+                        ),
+                    ),
+                ),
+            ),
+        )
+    ]

From 1007204cda802f02a5639e074d95b634b2be9ddf Mon Sep 17 00:00:00 2001
From: Tamas Nemeth <treff7es@gmail.com>
Date: Fri, 13 Oct 2023 21:07:19 +0200
Subject: [PATCH 130/156] feat(ingest/teradata): view parsing (#9005)

---
 .../docs/sources/teradata/teradata_pre.md     |   2 +-
 .../docs/sources/teradata/teradata_recipe.yml |   3 +-
 .../datahub/ingestion/source/sql/teradata.py  | 156 ++++++++++++------
 3 files changed, 106 insertions(+), 55 deletions(-)

diff --git a/metadata-ingestion/docs/sources/teradata/teradata_pre.md b/metadata-ingestion/docs/sources/teradata/teradata_pre.md
index eb59caa29eb52..7263a59f5ea3d 100644
--- a/metadata-ingestion/docs/sources/teradata/teradata_pre.md
+++ b/metadata-ingestion/docs/sources/teradata/teradata_pre.md
@@ -18,7 +18,7 @@
    
     If you want to run profiling, you need to grant select permission on all the tables you want to profile.
 
-3. If linege or usage extraction is enabled, please, check if query logging is enabled and it is set to size which
+3. If lineage or usage extraction is enabled, please, check if query logging is enabled and it is set to size which
 will fit for your queries (the default query text size Teradata captures is max 200 chars)
    An example how you can set it for all users:
     ```sql
diff --git a/metadata-ingestion/docs/sources/teradata/teradata_recipe.yml b/metadata-ingestion/docs/sources/teradata/teradata_recipe.yml
index 8cf07ba4c3a01..cc94de20110fe 100644
--- a/metadata-ingestion/docs/sources/teradata/teradata_recipe.yml
+++ b/metadata-ingestion/docs/sources/teradata/teradata_recipe.yml
@@ -3,12 +3,11 @@ source:
   type: teradata
   config:
     host_port: "myteradatainstance.teradata.com:1025"
-    #platform_instance: "myteradatainstance"
     username: myuser
     password: mypassword
     #database_pattern:
     #  allow:
-    #    - "demo_user"
+    #    - "my_database"
     #  ignoreCase: true
     include_table_lineage: true
     include_usage_statistics: true
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
index dd11cd840bed9..6080cf7b371e3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
@@ -1,5 +1,6 @@
 import logging
 from dataclasses import dataclass
+from datetime import datetime
 from typing import Iterable, Optional, Set, Union
 
 # This import verifies that the dependencies are available.
@@ -11,6 +12,7 @@
 
 from datahub.configuration.common import AllowDenyPattern
 from datahub.configuration.time_window_config import BaseTimeWindowConfig
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.sql_parsing_builder import SqlParsingBuilder
 from datahub.ingestion.api.common import PipelineContext
 from datahub.ingestion.api.decorators import (
@@ -32,11 +34,18 @@
 from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
 from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
 from datahub.ingestion.source_report.time_window import BaseTimeWindowReport
+from datahub.metadata._schema_classes import (
+    MetadataChangeEventClass,
+    SchemaMetadataClass,
+    ViewPropertiesClass,
+)
 from datahub.metadata.com.linkedin.pegasus2avro.schema import (
     BytesTypeClass,
     TimeTypeClass,
 )
+from datahub.utilities.file_backed_collections import FileBackedDict
 from datahub.utilities.sqlglot_lineage import SchemaResolver, sqlglot_lineage
+from datahub.utilities.urns.dataset_urn import DatasetUrn
 
 logger: logging.Logger = logging.getLogger(__name__)
 
@@ -64,6 +73,7 @@
 @dataclass
 class TeradataReport(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowReport):
     num_queries_parsed: int = 0
+    num_view_ddl_parsed: int = 0
     num_table_parse_failures: int = 0
 
 
@@ -82,17 +92,16 @@ class TeradataConfig(BaseTeradataConfig, BaseTimeWindowConfig):
         "This requires to have the table lineage feature enabled.",
     )
 
+    include_view_lineage = Field(
+        default=True,
+        description="Whether to include view lineage in the ingestion. "
+        "This requires to have the view lineage feature enabled.",
+    )
     usage: BaseUsageConfig = Field(
         description="The usage config to use when generating usage statistics",
         default=BaseUsageConfig(),
     )
 
-    use_schema_resolver: bool = Field(
-        default=True,
-        description="Read SchemaMetadata aspects from DataHub to aid in SQL parsing. Turn off only for testing.",
-        hidden_from_docs=True,
-    )
-
     default_db: Optional[str] = Field(
         default=None,
         description="The default database to use for unqualified table names",
@@ -141,46 +150,47 @@ def __init__(self, config: TeradataConfig, ctx: PipelineContext):
         self.report: TeradataReport = TeradataReport()
         self.graph: Optional[DataHubGraph] = ctx.graph
 
-        if self.graph:
-            if self.config.use_schema_resolver:
-                self.schema_resolver = (
-                    self.graph.initialize_schema_resolver_from_datahub(
-                        platform=self.platform,
-                        platform_instance=self.config.platform_instance,
-                        env=self.config.env,
-                    )
-                )
-                self.urns = self.schema_resolver.get_urns()
-            else:
-                self.schema_resolver = self.graph._make_schema_resolver(
-                    platform=self.platform,
-                    platform_instance=self.config.platform_instance,
-                    env=self.config.env,
-                )
-                self.urns = None
-        else:
-            self.schema_resolver = SchemaResolver(
-                platform=self.platform,
-                platform_instance=self.config.platform_instance,
-                graph=None,
-                env=self.config.env,
-            )
-            self.urns = None
-
         self.builder: SqlParsingBuilder = SqlParsingBuilder(
             usage_config=self.config.usage
             if self.config.include_usage_statistics
             else None,
-            generate_lineage=self.config.include_table_lineage,
+            generate_lineage=True,
             generate_usage_statistics=self.config.include_usage_statistics,
             generate_operations=self.config.usage.include_operational_stats,
         )
 
+        self.schema_resolver = SchemaResolver(
+            platform=self.platform,
+            platform_instance=self.config.platform_instance,
+            graph=None,
+            env=self.config.env,
+        )
+
+        self._view_definition_cache: FileBackedDict[str] = FileBackedDict()
+
     @classmethod
     def create(cls, config_dict, ctx):
         config = TeradataConfig.parse_obj(config_dict)
         return cls(config, ctx)
 
+    def get_view_lineage(self) -> Iterable[MetadataWorkUnit]:
+        for key in self._view_definition_cache.keys():
+            view_definition = self._view_definition_cache[key]
+            dataset_urn = DatasetUrn.create_from_string(key)
+
+            db_name: Optional[str] = None
+            # We need to get the default db from the dataset urn otherwise the builder generates the wrong urns
+            if "." in dataset_urn.get_dataset_name():
+                db_name = dataset_urn.get_dataset_name().split(".", 1)[0]
+
+            self.report.num_view_ddl_parsed += 1
+            if self.report.num_view_ddl_parsed % 1000 == 0:
+                logger.info(f"Parsed {self.report.num_queries_parsed} view ddl")
+
+            yield from self.gen_lineage_from_query(
+                query=view_definition, default_database=db_name, is_view_ddl=True
+            )
+
     def get_audit_log_mcps(self) -> Iterable[MetadataWorkUnit]:
         engine = self.get_metadata_engine()
         for entry in engine.execute(
@@ -192,27 +202,43 @@ def get_audit_log_mcps(self) -> Iterable[MetadataWorkUnit]:
             if self.report.num_queries_parsed % 1000 == 0:
                 logger.info(f"Parsed {self.report.num_queries_parsed} queries")
 
-            result = sqlglot_lineage(
-                sql=entry.query,
-                schema_resolver=self.schema_resolver,
-                default_db=None,
-                default_schema=entry.default_database
-                if entry.default_database
-                else self.config.default_db,
+            yield from self.gen_lineage_from_query(
+                query=entry.query,
+                default_database=entry.default_database,
+                timestamp=entry.timestamp,
+                user=entry.user,
+                is_view_ddl=False,
             )
-            if result.debug_info.table_error:
-                logger.debug(
-                    f"Error parsing table lineage, {result.debug_info.table_error}"
-                )
-                self.report.num_table_parse_failures += 1
-                continue
 
+    def gen_lineage_from_query(
+        self,
+        query: str,
+        default_database: Optional[str] = None,
+        timestamp: Optional[datetime] = None,
+        user: Optional[str] = None,
+        is_view_ddl: bool = False,
+    ) -> Iterable[MetadataWorkUnit]:
+        result = sqlglot_lineage(
+            sql=query,
+            schema_resolver=self.schema_resolver,
+            default_db=None,
+            default_schema=default_database
+            if default_database
+            else self.config.default_db,
+        )
+        if result.debug_info.table_error:
+            logger.debug(
+                f"Error parsing table lineage, {result.debug_info.table_error}"
+            )
+            self.report.num_table_parse_failures += 1
+        else:
             yield from self.builder.process_sql_parsing_result(
                 result,
-                query=entry.query,
-                query_timestamp=entry.timestamp,
-                user=f"urn:li:corpuser:{entry.user}",
-                include_urns=self.urns,
+                query=query,
+                is_view_ddl=is_view_ddl,
+                query_timestamp=timestamp,
+                user=f"urn:li:corpuser:{user}",
+                include_urns=self.schema_resolver.get_urns(),
             )
 
     def get_metadata_engine(self) -> Engine:
@@ -221,8 +247,34 @@ def get_metadata_engine(self) -> Engine:
         return create_engine(url, **self.config.options)
 
     def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
-        yield from super().get_workunits_internal()
+        # Add all schemas to the schema resolver
+        for wu in super().get_workunits_internal():
+            if isinstance(wu.metadata, MetadataChangeEventClass):
+                if wu.metadata.proposedSnapshot:
+                    for aspect in wu.metadata.proposedSnapshot.aspects:
+                        if isinstance(aspect, SchemaMetadataClass):
+                            self.schema_resolver.add_schema_metadata(
+                                wu.metadata.proposedSnapshot.urn,
+                                aspect,
+                            )
+                            break
+            if isinstance(wu.metadata, MetadataChangeProposalWrapper):
+                if (
+                    wu.metadata.entityUrn
+                    and isinstance(wu.metadata.aspect, ViewPropertiesClass)
+                    and wu.metadata.aspect.viewLogic
+                ):
+                    self._view_definition_cache[
+                        wu.metadata.entityUrn
+                    ] = wu.metadata.aspect.viewLogic
+            yield wu
+
+        if self.config.include_view_lineage:
+            self.report.report_ingestion_stage_start("view lineage extraction")
+            yield from self.get_view_lineage()
+
         if self.config.include_table_lineage or self.config.include_usage_statistics:
             self.report.report_ingestion_stage_start("audit log extraction")
             yield from self.get_audit_log_mcps()
-            yield from self.builder.gen_workunits()
+
+        yield from self.builder.gen_workunits()

From c2e8041d771db1a20889255372312791fb6d911c Mon Sep 17 00:00:00 2001
From: Tamas Nemeth <treff7es@gmail.com>
Date: Fri, 13 Oct 2023 22:59:18 +0200
Subject: [PATCH 131/156] Adding missing sqlparser libs to setup.py (#9015)

---
 metadata-ingestion/setup.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 3ea9a2ea61d74..545cafca9d4df 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -373,7 +373,10 @@
     # FIXME: I don't think tableau uses sqllineage anymore so we should be able
     # to remove that dependency.
     "tableau": {"tableauserverclient>=0.17.0"} | sqllineage_lib | sqlglot_lib,
-    "teradata": sql_common | {"teradatasqlalchemy>=17.20.0.0"},
+    "teradata": sql_common
+    | usage_common
+    | sqlglot_lib
+    | {"teradatasqlalchemy>=17.20.0.0"},
     "trino": sql_common | trino,
     "starburst-trino-usage": sql_common | usage_common | trino,
     "nifi": {"requests", "packaging", "requests-gssapi"},
@@ -432,9 +435,7 @@
 deepdiff_dep = "deepdiff"
 test_api_requirements = {pytest_dep, deepdiff_dep, "PyYAML"}
 
-debug_requirements = {
-    "memray"
-}
+debug_requirements = {"memray"}
 
 base_dev_requirements = {
     *base_requirements,

From 78b342f441b340189e4eab60574daa60074457e0 Mon Sep 17 00:00:00 2001
From: Indy Prentice <iprentic@users.noreply.github.com>
Date: Fri, 13 Oct 2023 19:04:44 -0300
Subject: [PATCH 132/156] feat(graphql): support filtering based on greater
 than/less than criteria (#9001)

Co-authored-by: Indy Prentice <indy@Indys-MacBook-Pro.local>
---
 .../src/main/resources/search.graphql         | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql
index 4cabdb04afe77..e0cde5a2db9f9 100644
--- a/datahub-graphql-core/src/main/resources/search.graphql
+++ b/datahub-graphql-core/src/main/resources/search.graphql
@@ -458,6 +458,26 @@ enum FilterOperator {
   Represents the relation: The field exists. If the field is an array, the field is either not present or empty.
   """
   EXISTS
+
+  """
+  Represent the relation greater than, e.g. ownerCount > 5
+  """
+  GREATER_THAN
+
+  """
+   Represent the relation greater than or equal to, e.g. ownerCount >= 5
+  """
+  GREATER_THAN_OR_EQUAL_TO
+
+  """
+  Represent the relation less than, e.g. ownerCount < 3
+  """
+  LESS_THAN
+
+  """
+  Represent the relation less than or equal to, e.g. ownerCount <= 3
+  """
+  LESS_THAN_OR_EQUAL_TO
 }
 
 """

From c81a339bfc3a57161e433c64bd331ca6af4f6f2d Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Mon, 16 Oct 2023 21:57:57 +0530
Subject: [PATCH 133/156] build(ingest): remove ratelimiter dependency (#9008)

---
 metadata-ingestion/setup.py                   |  1 -
 .../bigquery_v2/bigquery_audit_log_api.py     |  2 +-
 .../src/datahub/utilities/ratelimiter.py      | 56 +++++++++++++++++++
 .../tests/unit/utilities/test_ratelimiter.py  | 20 +++++++
 4 files changed, 77 insertions(+), 2 deletions(-)
 create mode 100644 metadata-ingestion/src/datahub/utilities/ratelimiter.py
 create mode 100644 metadata-ingestion/tests/unit/utilities/test_ratelimiter.py

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 545cafca9d4df..1f4f0a0bad9b2 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -38,7 +38,6 @@
     "progressbar2",
     "termcolor>=1.0.0",
     "psutil>=5.8.0",
-    "ratelimiter",
     "Deprecated",
     "humanfriendly",
     "packaging",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py
index 03b12c61ee5c6..db552c09cd0a7 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py
@@ -4,7 +4,6 @@
 
 from google.cloud import bigquery
 from google.cloud.logging_v2.client import Client as GCPLoggingClient
-from ratelimiter import RateLimiter
 
 from datahub.ingestion.source.bigquery_v2.bigquery_audit import (
     AuditLogEntry,
@@ -17,6 +16,7 @@
     BQ_DATE_SHARD_FORMAT,
     BQ_DATETIME_FORMAT,
 )
+from datahub.utilities.ratelimiter import RateLimiter
 
 logger: logging.Logger = logging.getLogger(__name__)
 
diff --git a/metadata-ingestion/src/datahub/utilities/ratelimiter.py b/metadata-ingestion/src/datahub/utilities/ratelimiter.py
new file mode 100644
index 0000000000000..3d47d25e14c49
--- /dev/null
+++ b/metadata-ingestion/src/datahub/utilities/ratelimiter.py
@@ -0,0 +1,56 @@
+import collections
+import threading
+import time
+from contextlib import AbstractContextManager
+from typing import Any, Deque
+
+
+# Modified version of https://github.com/RazerM/ratelimiter/blob/master/ratelimiter/_sync.py
+class RateLimiter(AbstractContextManager):
+
+    """Provides rate limiting for an operation with a configurable number of
+    requests for a time period.
+    """
+
+    def __init__(self, max_calls: int, period: float = 1.0) -> None:
+        """Initialize a RateLimiter object which enforces as much as max_calls
+        operations on period (eventually floating) number of seconds.
+        """
+        if period <= 0:
+            raise ValueError("Rate limiting period should be > 0")
+        if max_calls <= 0:
+            raise ValueError("Rate limiting number of calls should be > 0")
+
+        # We're using a deque to store the last execution timestamps, not for
+        # its maxlen attribute, but to allow constant time front removal.
+        self.calls: Deque = collections.deque()
+
+        self.period = period
+        self.max_calls = max_calls
+        self._lock = threading.Lock()
+
+    def __enter__(self) -> "RateLimiter":
+        with self._lock:
+            # We want to ensure that no more than max_calls were run in the allowed
+            # period. For this, we store the last timestamps of each call and run
+            # the rate verification upon each __enter__ call.
+            if len(self.calls) >= self.max_calls:
+                until = time.time() + self.period - self._timespan
+                sleeptime = until - time.time()
+                if sleeptime > 0:
+                    time.sleep(sleeptime)
+            return self
+
+    def __exit__(self, exc_type: Any, exc: Any, traceback: Any) -> None:
+        with self._lock:
+            # Store the last operation timestamp.
+            self.calls.append(time.time())
+
+            # Pop the timestamp list front (ie: the older calls) until the sum goes
+            # back below the period. This is our 'sliding period' window.
+            while self._timespan >= self.period:
+                self.calls.popleft()
+
+    @property
+    def _timespan(self) -> float:
+        return self.calls[-1] - self.calls[0]
diff --git a/metadata-ingestion/tests/unit/utilities/test_ratelimiter.py b/metadata-ingestion/tests/unit/utilities/test_ratelimiter.py
new file mode 100644
index 0000000000000..0384e1f918881
--- /dev/null
+++ b/metadata-ingestion/tests/unit/utilities/test_ratelimiter.py
@@ -0,0 +1,20 @@
+from collections import defaultdict
+from datetime import datetime
+from typing import Dict
+
+from datahub.utilities.ratelimiter import RateLimiter
+
+
+def test_rate_is_limited():
+    MAX_CALLS_PER_SEC = 5
+    TOTAL_CALLS = 18
+    actual_calls: Dict[float, int] = defaultdict(lambda: 0)
+
+    ratelimiter = RateLimiter(max_calls=MAX_CALLS_PER_SEC, period=1)
+    for _ in range(TOTAL_CALLS):
+        with ratelimiter:
+            actual_calls[datetime.now().replace(microsecond=0).timestamp()] += 1
+
+    assert len(actual_calls) == round(TOTAL_CALLS / MAX_CALLS_PER_SEC)
+    assert all(calls <= MAX_CALLS_PER_SEC for calls in actual_calls.values())
+    assert sum(actual_calls.values()) == TOTAL_CALLS

From 9ccd1d4f5da8f3c93cb9aaacdb5de66600c99c99 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Mon, 16 Oct 2023 14:34:15 -0400
Subject: [PATCH 134/156] build(ingest/redshift): Add sqlglot dependency
 (#9021)

---
 metadata-ingestion/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 1f4f0a0bad9b2..7be565d51260d 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -353,7 +353,7 @@
     | {"psycopg2-binary", "pymysql>=1.0.2"},
     "pulsar": {"requests"},
     "redash": {"redash-toolbelt", "sql-metadata"} | sqllineage_lib,
-    "redshift": sql_common | redshift_common | usage_common | {"redshift-connector"},
+    "redshift": sql_common | redshift_common | usage_common | sqlglot_lib | {"redshift-connector"},
     "redshift-legacy": sql_common | redshift_common,
     "redshift-usage-legacy": sql_common | usage_common | redshift_common,
     "s3": {*s3_base, *data_lake_profiling},

From 6366b63e48d37de883af61fb801632e9a43d6e48 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Mon, 16 Oct 2023 19:13:23 -0400
Subject: [PATCH 135/156] feat(ingest/teradata): Add option to not use file
 backed dict for view definitions (#9024)

---
 .../datahub/ingestion/source/sql/teradata.py  | 47 ++++++++-----------
 1 file changed, 20 insertions(+), 27 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
index 6080cf7b371e3..e628e4dbd3446 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
@@ -1,7 +1,7 @@
 import logging
 from dataclasses import dataclass
 from datetime import datetime
-from typing import Iterable, Optional, Set, Union
+from typing import Iterable, MutableMapping, Optional, Union
 
 # This import verifies that the dependencies are available.
 import teradatasqlalchemy  # noqa: F401
@@ -12,7 +12,6 @@
 
 from datahub.configuration.common import AllowDenyPattern
 from datahub.configuration.time_window_config import BaseTimeWindowConfig
-from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.emitter.sql_parsing_builder import SqlParsingBuilder
 from datahub.ingestion.api.common import PipelineContext
 from datahub.ingestion.api.decorators import (
@@ -34,11 +33,7 @@
 from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
 from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
 from datahub.ingestion.source_report.time_window import BaseTimeWindowReport
-from datahub.metadata._schema_classes import (
-    MetadataChangeEventClass,
-    SchemaMetadataClass,
-    ViewPropertiesClass,
-)
+from datahub.metadata._schema_classes import SchemaMetadataClass, ViewPropertiesClass
 from datahub.metadata.com.linkedin.pegasus2avro.schema import (
     BytesTypeClass,
     TimeTypeClass,
@@ -112,6 +107,11 @@ class TeradataConfig(BaseTeradataConfig, BaseTimeWindowConfig):
         description="Generate usage statistic.",
     )
 
+    use_file_backed_cache: bool = Field(
+        default=True,
+        description="Whether to use a file backed cache for the view definitions.",
+    )
+
 
 @platform_name("Teradata")
 @config_class(TeradataConfig)
@@ -142,7 +142,8 @@ class TeradataSource(TwoTierSQLAlchemySource):
      and "timestamp" >= TIMESTAMP '{start_time}'
      and "timestamp" < TIMESTAMP '{end_time}'
      """
-    urns: Optional[Set[str]]
+
+    _view_definition_cache: MutableMapping[str, str]
 
     def __init__(self, config: TeradataConfig, ctx: PipelineContext):
         super().__init__(config, ctx, "teradata")
@@ -166,7 +167,10 @@ def __init__(self, config: TeradataConfig, ctx: PipelineContext):
             env=self.config.env,
         )
 
-        self._view_definition_cache: FileBackedDict[str] = FileBackedDict()
+        if self.config.use_file_backed_cache:
+            self._view_definition_cache = FileBackedDict[str]()
+        else:
+            self._view_definition_cache = {}
 
     @classmethod
     def create(cls, config_dict, ctx):
@@ -249,24 +253,13 @@ def get_metadata_engine(self) -> Engine:
     def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
         # Add all schemas to the schema resolver
         for wu in super().get_workunits_internal():
-            if isinstance(wu.metadata, MetadataChangeEventClass):
-                if wu.metadata.proposedSnapshot:
-                    for aspect in wu.metadata.proposedSnapshot.aspects:
-                        if isinstance(aspect, SchemaMetadataClass):
-                            self.schema_resolver.add_schema_metadata(
-                                wu.metadata.proposedSnapshot.urn,
-                                aspect,
-                            )
-                            break
-            if isinstance(wu.metadata, MetadataChangeProposalWrapper):
-                if (
-                    wu.metadata.entityUrn
-                    and isinstance(wu.metadata.aspect, ViewPropertiesClass)
-                    and wu.metadata.aspect.viewLogic
-                ):
-                    self._view_definition_cache[
-                        wu.metadata.entityUrn
-                    ] = wu.metadata.aspect.viewLogic
+            urn = wu.get_urn()
+            schema_metadata = wu.get_aspect_of_type(SchemaMetadataClass)
+            if schema_metadata:
+                self.schema_resolver.add_schema_metadata(urn, schema_metadata)
+            view_properties = wu.get_aspect_of_type(ViewPropertiesClass)
+            if view_properties and self.config.include_view_lineage:
+                self._view_definition_cache[urn] = view_properties.viewLogic
             yield wu
 
         if self.config.include_view_lineage:

From 9fec6024fb177a321860e49f3c9977b41bb9e65f Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Tue, 17 Oct 2023 09:58:38 -0400
Subject: [PATCH 136/156] feat(ingest/unity-catalog): Support external S3
 lineage (#9025)

---
 .../datahub/ingestion/source/aws/s3_util.py   | 11 +++++--
 .../source/snowflake/snowflake_lineage_v2.py  |  6 ++--
 .../datahub/ingestion/source/unity/config.py  |  8 +++++
 .../datahub/ingestion/source/unity/proxy.py   |  8 +++++
 .../ingestion/source/unity/proxy_types.py     | 31 +++++++++++++++++++
 .../datahub/ingestion/source/unity/report.py  |  2 ++
 .../datahub/ingestion/source/unity/source.py  | 23 ++++++++++++++
 7 files changed, 84 insertions(+), 5 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/s3_util.py b/metadata-ingestion/src/datahub/ingestion/source/aws/s3_util.py
index 501162455cc45..878b8dd1bb9a5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/aws/s3_util.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/aws/s3_util.py
@@ -34,21 +34,26 @@ def get_bucket_relative_path(s3_uri: str) -> str:
     return "/".join(strip_s3_prefix(s3_uri).split("/")[1:])
 
 
-def make_s3_urn(s3_uri: str, env: str) -> str:
+def make_s3_urn(s3_uri: str, env: str, remove_extension: bool = True) -> str:
     s3_name = strip_s3_prefix(s3_uri)
 
     if s3_name.endswith("/"):
         s3_name = s3_name[:-1]
 
     name, extension = os.path.splitext(s3_name)
-
-    if extension != "":
+    if remove_extension and extension != "":
         extension = extension[1:]  # remove the dot
         return f"urn:li:dataset:(urn:li:dataPlatform:s3,{name}_{extension},{env})"
 
     return f"urn:li:dataset:(urn:li:dataPlatform:s3,{s3_name},{env})"
 
 
+def make_s3_urn_for_lineage(s3_uri: str, env: str) -> str:
+    # Ideally this is the implementation for all S3 URNs
+    # Don't feel comfortable changing `make_s3_urn` for glue, sagemaker, and athena
+    return make_s3_urn(s3_uri, env, remove_extension=False)
+
+
 def get_bucket_name(s3_uri: str) -> str:
     if not is_s3_uri(s3_uri):
         raise ValueError(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py
index 9a993f5774032..0a15c352fc842 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py
@@ -21,7 +21,7 @@
 import datahub.emitter.mce_builder as builder
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.workunit import MetadataWorkUnit
-from datahub.ingestion.source.aws.s3_util import make_s3_urn
+from datahub.ingestion.source.aws.s3_util import make_s3_urn_for_lineage
 from datahub.ingestion.source.snowflake.constants import (
     LINEAGE_PERMISSION_ERROR,
     SnowflakeEdition,
@@ -652,7 +652,9 @@ def get_external_upstreams(self, external_lineage: Set[str]) -> List[UpstreamCla
             # For now, populate only for S3
             if external_lineage_entry.startswith("s3://"):
                 external_upstream_table = UpstreamClass(
-                    dataset=make_s3_urn(external_lineage_entry, self.config.env),
+                    dataset=make_s3_urn_for_lineage(
+                        external_lineage_entry, self.config.env
+                    ),
                     type=DatasetLineageTypeClass.COPY,
                 )
                 external_upstreams.append(external_upstream_table)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
index a57ee39848855..16820c37d546e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py
@@ -166,6 +166,14 @@ class UnityCatalogSourceConfig(
         description="Option to enable/disable lineage generation.",
     )
 
+    include_external_lineage: bool = pydantic.Field(
+        default=True,
+        description=(
+            "Option to enable/disable lineage generation for external tables."
+            " Only external S3 tables are supported at the moment."
+        ),
+    )
+
     include_notebooks: bool = pydantic.Field(
         default=False,
         description="Ingest notebooks, represented as DataHub datasets.",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
index 9bcdb200f180e..3fb77ce512ed2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py
@@ -33,6 +33,7 @@
     ALLOWED_STATEMENT_TYPES,
     Catalog,
     Column,
+    ExternalTableReference,
     Metastore,
     Notebook,
     Query,
@@ -248,6 +249,13 @@ def table_lineage(self, table: Table, include_entity_lineage: bool) -> None:
                     )
                     if table_ref:
                         table.upstreams[table_ref] = {}
+                elif "fileInfo" in item:
+                    external_ref = ExternalTableReference.create_from_lineage(
+                        item["fileInfo"]
+                    )
+                    if external_ref:
+                        table.external_upstreams.add(external_ref)
+
                 for notebook in item.get("notebookInfos") or []:
                     table.upstream_notebooks.add(notebook["notebook_id"])
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
index 18ac2475b51e0..315c1c0d20186 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py
@@ -10,6 +10,7 @@
     CatalogType,
     ColumnTypeName,
     DataSourceFormat,
+    SecurableType,
     TableType,
 )
 from databricks.sdk.service.sql import QueryStatementType
@@ -176,6 +177,35 @@ def external_path(self) -> str:
         return f"{self.catalog}/{self.schema}/{self.table}"
 
 
+@dataclass(frozen=True, order=True)
+class ExternalTableReference:
+    path: str
+    has_permission: bool
+    name: Optional[str]
+    type: Optional[SecurableType]
+    storage_location: Optional[str]
+
+    @classmethod
+    def create_from_lineage(cls, d: dict) -> Optional["ExternalTableReference"]:
+        try:
+            securable_type: Optional[SecurableType]
+            try:
+                securable_type = SecurableType(d.get("securable_type", "").lower())
+            except ValueError:
+                securable_type = None
+
+            return cls(
+                path=d["path"],
+                has_permission=d.get("has_permission") or True,
+                name=d.get("securable_name"),
+                type=securable_type,
+                storage_location=d.get("storage_location"),
+            )
+        except Exception as e:
+            logger.warning(f"Failed to create ExternalTableReference from {d}: {e}")
+            return None
+
+
 @dataclass
 class Table(CommonProperty):
     schema: Schema
@@ -193,6 +223,7 @@ class Table(CommonProperty):
     view_definition: Optional[str]
     properties: Dict[str, str]
     upstreams: Dict[TableReference, Dict[str, List[str]]] = field(default_factory=dict)
+    external_upstreams: Set[ExternalTableReference] = field(default_factory=set)
     upstream_notebooks: Set[NotebookId] = field(default_factory=set)
     downstream_notebooks: Set[NotebookId] = field(default_factory=set)
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py
index fa61571fa92cb..4153d9dd88eb8 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py
@@ -19,6 +19,8 @@ class UnityCatalogReport(IngestionStageReport, StaleEntityRemovalSourceReport):
     notebooks: EntityFilterReport = EntityFilterReport.field(type="notebook")
 
     num_column_lineage_skipped_column_count: int = 0
+    num_external_upstreams_lacking_permissions: int = 0
+    num_external_upstreams_unsupported: int = 0
 
     num_queries: int = 0
     num_queries_dropped_parse_failure: int = 0
diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
index 27c1f341aa84d..b63cf65d55dc8 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py
@@ -41,6 +41,7 @@
     TestConnectionReport,
 )
 from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.source.aws.s3_util import make_s3_urn_for_lineage
 from datahub.ingestion.source.common.subtypes import (
     DatasetContainerSubTypes,
     DatasetSubTypes,
@@ -455,6 +456,28 @@ def _generate_lineage_aspect(
                 )
             )
 
+        if self.config.include_external_lineage:
+            for external_ref in table.external_upstreams:
+                if not external_ref.has_permission or not external_ref.path:
+                    self.report.num_external_upstreams_lacking_permissions += 1
+                    logger.warning(
+                        f"Lacking permissions for external file upstream on {table.ref}"
+                    )
+                elif external_ref.path.startswith("s3://"):
+                    upstreams.append(
+                        UpstreamClass(
+                            dataset=make_s3_urn_for_lineage(
+                                external_ref.path, self.config.env
+                            ),
+                            type=DatasetLineageTypeClass.COPY,
+                        )
+                    )
+                else:
+                    self.report.num_external_upstreams_unsupported += 1
+                    logger.warning(
+                        f"Unsupported external file upstream on {table.ref}: {external_ref.path}"
+                    )
+
         if upstreams:
             return UpstreamLineageClass(
                 upstreams=upstreams,

From 10eb205cb8d455639c6d09dcc0c8f3853264f96f Mon Sep 17 00:00:00 2001
From: Tamas Nemeth <treff7es@gmail.com>
Date: Tue, 17 Oct 2023 16:16:25 +0200
Subject: [PATCH 137/156] fix(ingest) - Fix file backed collection temp
 directory removal (#9027)

---
 .../src/datahub/utilities/file_backed_collections.py   | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py
index c04d2138bc116..18493edded4b7 100644
--- a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py
+++ b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py
@@ -3,6 +3,7 @@
 import logging
 import pathlib
 import pickle
+import shutil
 import sqlite3
 import tempfile
 from dataclasses import dataclass, field
@@ -56,15 +57,15 @@ class ConnectionWrapper:
     conn: sqlite3.Connection
     filename: pathlib.Path
 
-    _temp_directory: Optional[tempfile.TemporaryDirectory]
+    _temp_directory: Optional[str]
 
     def __init__(self, filename: Optional[pathlib.Path] = None):
         self._temp_directory = None
 
         # Warning: If filename is provided, the file will not be automatically cleaned up.
         if not filename:
-            self._temp_directory = tempfile.TemporaryDirectory()
-            filename = pathlib.Path(self._temp_directory.name) / _DEFAULT_FILE_NAME
+            self._temp_directory = tempfile.mkdtemp()
+            filename = pathlib.Path(self._temp_directory) / _DEFAULT_FILE_NAME
 
         self.conn = sqlite3.connect(filename, isolation_level=None)
         self.conn.row_factory = sqlite3.Row
@@ -101,7 +102,8 @@ def executemany(
     def close(self) -> None:
         self.conn.close()
         if self._temp_directory:
-            self._temp_directory.cleanup()
+            shutil.rmtree(self._temp_directory)
+            self._temp_directory = None
 
     def __enter__(self) -> "ConnectionWrapper":
         return self

From e7c662a0aca0be97e34bec55161766ea84036ced Mon Sep 17 00:00:00 2001
From: ethan-cartwright <ethan.cartwright.m@gmail.com>
Date: Tue, 17 Oct 2023 10:54:07 -0400
Subject: [PATCH 138/156] add dependency level to scrollAcrossLineage search
 results (#9016)

---
 datahub-web-react/src/graphql/scroll.graphql | 1 +
 1 file changed, 1 insertion(+)

diff --git a/datahub-web-react/src/graphql/scroll.graphql b/datahub-web-react/src/graphql/scroll.graphql
index 18274c50c2166..1031fed7b9e13 100644
--- a/datahub-web-react/src/graphql/scroll.graphql
+++ b/datahub-web-react/src/graphql/scroll.graphql
@@ -408,6 +408,7 @@ fragment downloadScrollAcrossLineageResult on ScrollAcrossLineageResults {
     count
     total
     searchResults {
+        degree
         entity {
             ...downloadSearchResults
         }

From ae5fd90c73ff29e00f4b8e20735ce0b72e7b823b Mon Sep 17 00:00:00 2001
From: ethan-cartwright <ethan.cartwright.m@gmail.com>
Date: Tue, 17 Oct 2023 10:55:07 -0400
Subject: [PATCH 139/156] add create dataproduct example (#9009)

---
 .../examples/library/create_dataproduct.py    | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 metadata-ingestion/examples/library/create_dataproduct.py

diff --git a/metadata-ingestion/examples/library/create_dataproduct.py b/metadata-ingestion/examples/library/create_dataproduct.py
new file mode 100644
index 0000000000000..245395b602480
--- /dev/null
+++ b/metadata-ingestion/examples/library/create_dataproduct.py
@@ -0,0 +1,25 @@
+from datahub.api.entities.dataproduct.dataproduct import DataProduct
+from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
+
+gms_endpoint = "http://localhost:8080"
+graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint))
+
+data_product = DataProduct(
+    id="pet_of_the_week",
+    display_name="Pet of the Week Campagin",
+    domain="urn:li:domain:ef39e99a-9d61-406d-b4a8-c70b16380206",
+    description="This campaign includes Pet of the Week data.",
+    assets=[
+        "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.pet_details,PROD)",
+        "urn:li:dashboard:(looker,baz)",
+        "urn:li:dataFlow:(airflow,dag_abc,PROD)",
+    ],
+    owners=[{"id": "urn:li:corpuser:jdoe", "type": "BUSINESS_OWNER"}],
+    terms=["urn:li:glossaryTerm:ClientsAndAccounts.AccountBalance"],
+    tags=["urn:li:tag:adoption"],
+    properties={"lifecycle": "production", "sla": "7am every day"},
+    external_url="https://en.wikipedia.org/wiki/Sloth",
+)
+
+for mcp in data_product.generate_mcp(upsert=False):
+    graph.emit(mcp)

From 75108ceb2ff125af52fb1e37f7f6d371a77de3b7 Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Tue, 17 Oct 2023 14:13:31 -0400
Subject: [PATCH 140/156] Download Lineage Results Cypress Test (#9017)

---
 .../styled/search/DownloadAsCsvModal.tsx      |  2 +
 .../styled/search/SearchExtendedMenu.tsx      |  4 +-
 .../e2e/lineage/download_lineage_results.js   | 80 +++++++++++++++++++
 3 files changed, 84 insertions(+), 2 deletions(-)
 create mode 100644 smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js

diff --git a/datahub-web-react/src/app/entity/shared/components/styled/search/DownloadAsCsvModal.tsx b/datahub-web-react/src/app/entity/shared/components/styled/search/DownloadAsCsvModal.tsx
index 452658583cf61..92e859ee1b329 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/search/DownloadAsCsvModal.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/search/DownloadAsCsvModal.tsx
@@ -130,6 +130,7 @@ export default function DownloadAsCsvModal({
                         Close
                     </Button>
                     <Button
+                        data-testid="csv-modal-download-button"
                         onClick={() => {
                             setShowDownloadAsCsvModal(false);
                             triggerCsvDownload(saveAsTitle);
@@ -142,6 +143,7 @@ export default function DownloadAsCsvModal({
             }
         >
             <Input
+                data-testid="download-as-csv-input"
                 placeholder="datahub.csv"
                 value={saveAsTitle}
                 onChange={(e) => {
diff --git a/datahub-web-react/src/app/entity/shared/components/styled/search/SearchExtendedMenu.tsx b/datahub-web-react/src/app/entity/shared/components/styled/search/SearchExtendedMenu.tsx
index d81ec673324bf..a26749d8a37a1 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/search/SearchExtendedMenu.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/search/SearchExtendedMenu.tsx
@@ -45,7 +45,7 @@ export default function SearchExtendedMenu({
 
     const menu = (
         <Menu>
-            <MenuItem key="0">
+            <MenuItem key="0" data-testid="download-as-csv-menu-item">
                 <DownloadAsCsvButton
                     isDownloadingCsv={isDownloadingCsv}
                     setShowDownloadAsCsvModal={setShowDownloadAsCsvModal}
@@ -75,7 +75,7 @@ export default function SearchExtendedMenu({
                 totalResults={totalResults}
             />
             <Dropdown overlay={menu} trigger={['click']}>
-                <MenuIcon />
+                <MenuIcon data-testid="three-dot-menu" />
             </Dropdown>
         </>
     );
diff --git a/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js b/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js
new file mode 100644
index 0000000000000..315aa7b22b9da
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js
@@ -0,0 +1,80 @@
+const test_dataset = "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)";
+const first_degree = [
+    "urn:li:chart:(looker,cypress_baz1)",
+    "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)",
+    "urn:li:mlFeature:(cypress-test-2,some-cypress-feature-1)"
+];
+const second_degree = [
+    "urn:li:chart:(looker,cypress_baz2)",
+    "urn:li:dashboard:(looker,cypress_baz)",
+    "urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)",
+    "urn:li:mlPrimaryKey:(cypress-test-2,some-cypress-feature-2)"
+];
+const third_degree_plus = [
+    "urn:li:dataJob:(urn:li:dataFlow:(airflow,cypress_dag_abc,PROD),cypress_task_123)",
+    "urn:li:dataJob:(urn:li:dataFlow:(airflow,cypress_dag_abc,PROD),cypress_task_456)",
+    "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)",
+    "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created,PROD)",
+    "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created_no_tag,PROD)",
+    "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_deleted,PROD)"
+];
+const downloadCsvFile = (filename) => {
+    cy.get('[data-testid="three-dot-menu"]').click();
+    cy.get('[data-testid="download-as-csv-menu-item"]').click();
+    cy.get('[data-testid="download-as-csv-input"]').clear().type(filename);
+    cy.get('[data-testid="csv-modal-download-button"]').click().wait(5000);
+    cy.ensureTextNotPresent("Creating CSV to download");
+};
+
+describe("download lineage results to .csv file", () => {
+
+    it("download and verify lineage results for 1st, 2nd and 3+ degree of dependencies", () => {
+      cy.loginWithCredentials();
+      cy.goToDataset(test_dataset,"SampleCypressKafkaDataset");
+      cy.openEntityTab("Lineage");
+
+      // Verify 1st degree of dependencies
+      cy.contains(/1 - 3 of 3/);
+      downloadCsvFile("first_degree_results.csv");
+      let first_degree_csv = cy.readFile('cypress/downloads/first_degree_results.csv');
+      first_degree.forEach(function (urn) {
+        first_degree_csv.should('contain', urn) 
+      });
+      second_degree.forEach(function (urn) {
+        first_degree_csv.should('not.contain', urn)
+      });
+      third_degree_plus.forEach(function (urn) {
+        first_degree_csv.should('not.contain', urn);
+      });
+
+      // Verify 1st and 2nd degree of dependencies
+      cy.get('[data-testid="facet-degree-2"]').click().wait(5000);
+      cy.contains(/1 - 7 of 7/);
+      downloadCsvFile("second_degree_results.csv");
+      let second_degree_csv = cy.readFile('cypress/downloads/second_degree_results.csv');
+      first_degree.forEach(function (urn) {
+        second_degree_csv.should('contain', urn) 
+      });
+      second_degree.forEach(function (urn) {
+        second_degree_csv.should('contain', urn)
+      });
+      third_degree_plus.forEach(function (urn) {
+        second_degree_csv.should('not.contain', urn);
+      });
+
+      // Verify 1st 2nd and 3+ degree of dependencies(Verify multi page download)
+      cy.get('[data-testid="facet-degree-3+"]').click().wait(5000);
+      cy.contains(/1 - 10 of 13/);
+      downloadCsvFile("third_plus_degree_results.csv");
+      let third_degree_csv = cy.readFile('cypress/downloads/third_plus_degree_results.csv');
+      first_degree.forEach(function (urn) {
+        third_degree_csv.should('contain', urn) 
+      });
+      second_degree.forEach(function (urn) {
+        third_degree_csv.should('contain', urn)
+      });
+      third_degree_plus.forEach(function (urn) {
+        third_degree_csv.should('contain', urn);
+      });
+    });
+}); 
\ No newline at end of file

From da6cc54d63e8b3814d2bb8dc4e14b90d74d67a5d Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Tue, 17 Oct 2023 16:18:39 -0400
Subject: [PATCH 141/156] fix(ingest/bigquery): Remove table name restrictions
 (allow $ and @) (#9030)

---
 .../source/bigquery_v2/bigquery_audit.py        | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py
index 88060a9cdc91d..55366d6c57cf8 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py
@@ -3,7 +3,7 @@
 import re
 from dataclasses import dataclass, field
 from datetime import datetime
-from typing import Any, ClassVar, Dict, List, Optional, Pattern, Set, Tuple, Union
+from typing import Any, ClassVar, Dict, List, Optional, Pattern, Tuple, Union
 
 from dateutil import parser
 
@@ -35,8 +35,6 @@ class BigqueryTableIdentifier:
     dataset: str
     table: str
 
-    invalid_chars: ClassVar[Set[str]] = {"$", "@"}
-
     # Note: this regex may get overwritten by the sharded_table_pattern config.
     # The class-level constant, however, will not be overwritten.
     _BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX: ClassVar[
@@ -105,18 +103,7 @@ def get_table_display_name(self) -> str:
             )
 
         table_name, _ = self.get_table_and_shard(shortened_table_name)
-        if not table_name:
-            table_name = self.dataset
-
-        # Handle exceptions
-        invalid_chars_in_table_name: List[str] = [
-            c for c in self.invalid_chars if c in table_name
-        ]
-        if invalid_chars_in_table_name:
-            raise ValueError(
-                f"Cannot handle {self.raw_table_name()} - poorly formatted table name, contains {invalid_chars_in_table_name}"
-            )
-        return table_name
+        return table_name or self.dataset
 
     def get_table_name(self) -> str:
         """

From 6f466b0e5080dfb7d5fee504821006223aeeddc7 Mon Sep 17 00:00:00 2001
From: RyanHolstien <RyanHolstien@users.noreply.github.com>
Date: Tue, 17 Oct 2023 15:40:38 -0500
Subject: [PATCH 142/156] chore(docker): update base images to alpine 3.18
 (#8967)

---
 docker/build.gradle                      | 2 +-
 docker/datahub-gms/Dockerfile            | 2 +-
 docker/datahub-ingestion-base/Dockerfile | 2 +-
 docker/datahub-mae-consumer/Dockerfile   | 2 +-
 docker/datahub-mce-consumer/Dockerfile   | 2 +-
 docker/datahub-upgrade/Dockerfile        | 2 +-
 docker/elasticsearch-setup/Dockerfile    | 2 +-
 docker/mysql-setup/Dockerfile            | 2 +-
 docker/postgres-setup/Dockerfile         | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/docker/build.gradle b/docker/build.gradle
index 0faea626e982d..c8fdbc86b18b7 100644
--- a/docker/build.gradle
+++ b/docker/build.gradle
@@ -35,7 +35,7 @@ task quickstart(type: Exec, dependsOn: ':metadata-ingestion:install') {
 
     environment "DATAHUB_TELEMETRY_ENABLED", "false"
     environment "DOCKER_COMPOSE_BASE", "file://${rootProject.projectDir}"
-    // environment "ACTIONS_VERSION", 'alpine3.17-slim'
+    // environment "ACTIONS_VERSION", 'alpine3.18-slim'
     // environment "DATAHUB_ACTIONS_IMAGE", 'nginx'
 
     // Elastic
diff --git a/docker/datahub-gms/Dockerfile b/docker/datahub-gms/Dockerfile
index 2d74a288b8c99..f5428f7480403 100644
--- a/docker/datahub-gms/Dockerfile
+++ b/docker/datahub-gms/Dockerfile
@@ -1,7 +1,7 @@
 # Defining environment
 ARG APP_ENV=prod
 
-FROM golang:1-alpine3.17 AS binary
+FROM golang:1-alpine3.18 AS binary
 
 ENV DOCKERIZE_VERSION v0.6.1
 WORKDIR /go/src/github.com/jwilder
diff --git a/docker/datahub-ingestion-base/Dockerfile b/docker/datahub-ingestion-base/Dockerfile
index 564cc19cc9a5f..25afe9b8b3dce 100644
--- a/docker/datahub-ingestion-base/Dockerfile
+++ b/docker/datahub-ingestion-base/Dockerfile
@@ -1,7 +1,7 @@
 ARG APP_ENV=full
 ARG BASE_IMAGE=base
 
-FROM golang:1-alpine3.17 AS dockerize-binary
+FROM golang:1-alpine3.18 AS dockerize-binary
 
 ENV DOCKERIZE_VERSION v0.6.1
 WORKDIR /go/src/github.com/jwilder
diff --git a/docker/datahub-mae-consumer/Dockerfile b/docker/datahub-mae-consumer/Dockerfile
index 734f8ba452f3e..4b321b1639c1b 100644
--- a/docker/datahub-mae-consumer/Dockerfile
+++ b/docker/datahub-mae-consumer/Dockerfile
@@ -1,7 +1,7 @@
 # Defining environment
 ARG APP_ENV=prod
 
-FROM golang:1-alpine3.17 AS binary
+FROM golang:1-alpine3.18 AS binary
 
 ENV DOCKERIZE_VERSION v0.6.1
 WORKDIR /go/src/github.com/jwilder
diff --git a/docker/datahub-mce-consumer/Dockerfile b/docker/datahub-mce-consumer/Dockerfile
index ee5d927fb1ddb..4d38ee6daa235 100644
--- a/docker/datahub-mce-consumer/Dockerfile
+++ b/docker/datahub-mce-consumer/Dockerfile
@@ -1,7 +1,7 @@
 # Defining environment
 ARG APP_ENV=prod
 
-FROM golang:1-alpine3.17 AS binary
+FROM golang:1-alpine3.18 AS binary
 
 ENV DOCKERIZE_VERSION v0.6.1
 WORKDIR /go/src/github.com/jwilder
diff --git a/docker/datahub-upgrade/Dockerfile b/docker/datahub-upgrade/Dockerfile
index 4e1521cc0561e..945be54678a24 100644
--- a/docker/datahub-upgrade/Dockerfile
+++ b/docker/datahub-upgrade/Dockerfile
@@ -1,7 +1,7 @@
 # Defining environment
 ARG APP_ENV=prod
 
-FROM golang:1-alpine3.17 AS binary
+FROM golang:1-alpine3.18 AS binary
 
 ENV DOCKERIZE_VERSION v0.6.1
 WORKDIR /go/src/github.com/jwilder
diff --git a/docker/elasticsearch-setup/Dockerfile b/docker/elasticsearch-setup/Dockerfile
index af3c8c9df762a..c8fb2eba911b8 100644
--- a/docker/elasticsearch-setup/Dockerfile
+++ b/docker/elasticsearch-setup/Dockerfile
@@ -3,7 +3,7 @@
 # Defining environment
 ARG APP_ENV=prod
 
-FROM golang:1-alpine3.17 AS binary
+FROM golang:1-alpine3.18 AS binary
 
 ENV DOCKERIZE_VERSION v0.6.1
 WORKDIR /go/src/github.com/jwilder
diff --git a/docker/mysql-setup/Dockerfile b/docker/mysql-setup/Dockerfile
index 732b860a58f07..56bab61180489 100644
--- a/docker/mysql-setup/Dockerfile
+++ b/docker/mysql-setup/Dockerfile
@@ -1,4 +1,4 @@
-FROM golang:1-alpine3.17 AS binary
+FROM golang:1-alpine3.18 AS binary
 
 ENV DOCKERIZE_VERSION v0.6.1
 WORKDIR /go/src/github.com/jwilder
diff --git a/docker/postgres-setup/Dockerfile b/docker/postgres-setup/Dockerfile
index 313615ac3465b..7f4d53ae044d4 100644
--- a/docker/postgres-setup/Dockerfile
+++ b/docker/postgres-setup/Dockerfile
@@ -1,4 +1,4 @@
-FROM golang:1-alpine3.17 AS binary
+FROM golang:1-alpine3.18 AS binary
 
 ENV DOCKERIZE_VERSION v0.6.1
 WORKDIR /go/src/github.com/jwilder

From 60c1aab58765c2f643021efbba2719a845be2033 Mon Sep 17 00:00:00 2001
From: RyanHolstien <RyanHolstien@users.noreply.github.com>
Date: Tue, 17 Oct 2023 15:50:32 -0500
Subject: [PATCH 143/156] fix(frontend): update cookie module (#8862)

---
 datahub-frontend/app/auth/AuthModule.java     |  4 +-
 datahub-frontend/app/auth/AuthUtils.java      |  9 ++-
 .../app/auth/cookie/CustomCookiesModule.java  | 22 ++++++
 .../cookie/CustomSessionCookieBaker.scala     | 25 ++++++
 .../sso/oidc/OidcAuthorizationGenerator.java  | 37 ---------
 .../app/auth/sso/oidc/OidcCallbackLogic.java  | 19 ++++-
 .../controllers/AuthenticationController.java | 24 +++---
 datahub-frontend/conf/application.conf        | 15 +++-
 .../test/app/ApplicationTest.java             | 28 +++++--
 docs/authentication/README.md                 |  5 +-
 .../guides/sso/configure-oidc-react.md        |  3 +-
 docs/deploy/environment-vars.md               | 13 +--
 docs/how/updating-datahub.md                  |  2 +
 .../AuthenticationConfiguration.java          |  2 +
 .../TokenServiceConfiguration.java            | 15 ++++
 metadata-service/auth-filter/build.gradle     |  2 +
 .../authentication/AuthTestConfiguration.java | 79 +++++++++++++++++++
 .../AuthenticationFilterTest.java             | 53 +++++++++++++
 .../src/main/resources/application.yml        |  2 +
 .../auth/DataHubTokenServiceFactory.java      |  4 +-
 20 files changed, 288 insertions(+), 75 deletions(-)
 create mode 100644 datahub-frontend/app/auth/cookie/CustomCookiesModule.java
 create mode 100644 datahub-frontend/app/auth/cookie/CustomSessionCookieBaker.scala
 create mode 100644 metadata-service/auth-config/src/main/java/com/datahub/authentication/TokenServiceConfiguration.java
 create mode 100644 metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthTestConfiguration.java
 create mode 100644 metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthenticationFilterTest.java

diff --git a/datahub-frontend/app/auth/AuthModule.java b/datahub-frontend/app/auth/AuthModule.java
index 98f3b82285eda..fe04c3629fe58 100644
--- a/datahub-frontend/app/auth/AuthModule.java
+++ b/datahub-frontend/app/auth/AuthModule.java
@@ -56,7 +56,7 @@ public class AuthModule extends AbstractModule {
      * Pac4j Stores Session State in a browser-side cookie in encrypted fashion. This configuration
      * value provides a stable encryption base from which to derive the encryption key.
      *
-     * We hash this value (SHA1), then take the first 16 bytes as the AES key.
+     * We hash this value (SHA256), then take the first 16 bytes as the AES key.
      */
     private static final String PAC4J_AES_KEY_BASE_CONF = "play.http.secret.key";
     private static final String PAC4J_SESSIONSTORE_PROVIDER_CONF = "pac4j.sessionStore.provider";
@@ -93,7 +93,7 @@ protected void configure() {
                 // it to hex and slice the first 16 bytes, because AES key length must strictly
                 // have a specific length.
                 final String aesKeyBase = _configs.getString(PAC4J_AES_KEY_BASE_CONF);
-                final String aesKeyHash = DigestUtils.sha1Hex(aesKeyBase.getBytes(StandardCharsets.UTF_8));
+                final String aesKeyHash = DigestUtils.sha256Hex(aesKeyBase.getBytes(StandardCharsets.UTF_8));
                 final String aesEncryptionKey = aesKeyHash.substring(0, 16);
                 playCacheCookieStore = new PlayCookieSessionStore(
                         new ShiroAesDataEncrypter(aesEncryptionKey.getBytes()));
diff --git a/datahub-frontend/app/auth/AuthUtils.java b/datahub-frontend/app/auth/AuthUtils.java
index 80bd631d0db70..386eee725c83d 100644
--- a/datahub-frontend/app/auth/AuthUtils.java
+++ b/datahub-frontend/app/auth/AuthUtils.java
@@ -41,6 +41,11 @@ public class AuthUtils {
      */
     public static final String SYSTEM_CLIENT_SECRET_CONFIG_PATH = "systemClientSecret";
 
+    /**
+     * Cookie name for redirect url that is manually separated from the session to reduce size
+     */
+    public static final String REDIRECT_URL_COOKIE_NAME = "REDIRECT_URL";
+
     public static final CorpuserUrn DEFAULT_ACTOR_URN = new CorpuserUrn("datahub");
 
     public static final String LOGIN_ROUTE = "/login";
@@ -77,7 +82,9 @@ public static boolean isEligibleForForwarding(Http.Request req) {
      * as well as their agreement to determine authentication status.
      */
     public static boolean hasValidSessionCookie(final Http.Request req) {
-        return req.session().data().containsKey(ACTOR)
+        Map<String, String> sessionCookie = req.session().data();
+        return sessionCookie.containsKey(ACCESS_TOKEN)
+                && sessionCookie.containsKey(ACTOR)
                 && req.getCookie(ACTOR).isPresent()
                 && req.session().data().get(ACTOR).equals(req.getCookie(ACTOR).get().value());
     }
diff --git a/datahub-frontend/app/auth/cookie/CustomCookiesModule.java b/datahub-frontend/app/auth/cookie/CustomCookiesModule.java
new file mode 100644
index 0000000000000..a6dbd69a93889
--- /dev/null
+++ b/datahub-frontend/app/auth/cookie/CustomCookiesModule.java
@@ -0,0 +1,22 @@
+package auth.cookie;
+
+import com.google.inject.AbstractModule;
+import play.api.libs.crypto.CookieSigner;
+import play.api.libs.crypto.CookieSignerProvider;
+import play.api.mvc.DefaultFlashCookieBaker;
+import play.api.mvc.FlashCookieBaker;
+import play.api.mvc.SessionCookieBaker;
+
+
+public class CustomCookiesModule extends AbstractModule {
+
+  @Override
+  public void configure() {
+    bind(CookieSigner.class).toProvider(CookieSignerProvider.class);
+    // We override the session cookie baker to not use a fallback, this prevents using an old URL Encoded cookie
+    bind(SessionCookieBaker.class).to(CustomSessionCookieBaker.class);
+    // We don't care about flash cookies, we don't use them
+    bind(FlashCookieBaker.class).to(DefaultFlashCookieBaker.class);
+  }
+
+}
diff --git a/datahub-frontend/app/auth/cookie/CustomSessionCookieBaker.scala b/datahub-frontend/app/auth/cookie/CustomSessionCookieBaker.scala
new file mode 100644
index 0000000000000..6f0a6604fa64b
--- /dev/null
+++ b/datahub-frontend/app/auth/cookie/CustomSessionCookieBaker.scala
@@ -0,0 +1,25 @@
+package auth.cookie
+
+import com.google.inject.Inject
+import play.api.http.{SecretConfiguration, SessionConfiguration}
+import play.api.libs.crypto.CookieSigner
+import play.api.mvc.DefaultSessionCookieBaker
+
+import scala.collection.immutable.Map
+
+/**
+ * Overrides default fallback to URL Encoding behavior, prevents usage of old URL encoded session cookies
+ * @param config
+ * @param secretConfiguration
+ * @param cookieSigner
+ */
+class CustomSessionCookieBaker @Inject() (
+  override val config: SessionConfiguration,
+  override val secretConfiguration: SecretConfiguration,
+  cookieSigner: CookieSigner
+) extends DefaultSessionCookieBaker(config, secretConfiguration, cookieSigner) {
+  // Has to be a Scala class because it extends a trait with concrete implementations, Scala does compilation tricks
+
+  // Forces use of jwt encoding and disallows fallback to legacy url encoding
+  override def decode(encodedData: String): Map[String, String] = jwtCodec.decode(encodedData)
+}
diff --git a/datahub-frontend/app/auth/sso/oidc/OidcAuthorizationGenerator.java b/datahub-frontend/app/auth/sso/oidc/OidcAuthorizationGenerator.java
index 3f864ed5abddf..baca144610ec4 100644
--- a/datahub-frontend/app/auth/sso/oidc/OidcAuthorizationGenerator.java
+++ b/datahub-frontend/app/auth/sso/oidc/OidcAuthorizationGenerator.java
@@ -1,19 +1,9 @@
 package auth.sso.oidc;
 
-import java.text.ParseException;
 import java.util.Map.Entry;
 import java.util.Optional;
 
-import com.nimbusds.jose.Algorithm;
-import com.nimbusds.jose.Header;
-import com.nimbusds.jose.JWEAlgorithm;
-import com.nimbusds.jose.JWSAlgorithm;
-import com.nimbusds.jose.util.Base64URL;
-import com.nimbusds.jose.util.JSONObjectUtils;
-import com.nimbusds.jwt.EncryptedJWT;
 import com.nimbusds.jwt.JWTParser;
-import com.nimbusds.jwt.SignedJWT;
-import net.minidev.json.JSONObject;
 import org.pac4j.core.authorization.generator.AuthorizationGenerator;
 import org.pac4j.core.context.WebContext;
 import org.pac4j.core.profile.AttributeLocation;
@@ -63,32 +53,5 @@ public Optional<UserProfile> generate(WebContext context, UserProfile profile) {
         
         return Optional.ofNullable(profile);
     }
-
-    private static JWT parse(final String s) throws ParseException {
-        final int firstDotPos = s.indexOf(".");
-
-        if (firstDotPos == -1) {
-            throw new ParseException("Invalid JWT serialization: Missing dot delimiter(s)", 0);
-        }
-
-        Base64URL header = new Base64URL(s.substring(0, firstDotPos));
-        JSONObject jsonObject;
-
-        try {
-            jsonObject = JSONObjectUtils.parse(header.decodeToString());
-        } catch (ParseException e) {
-            throw new ParseException("Invalid unsecured/JWS/JWE header: " + e.getMessage(), 0);
-        }
-
-        Algorithm alg = Header.parseAlgorithm(jsonObject);
-
-        if (alg instanceof JWSAlgorithm) {
-            return SignedJWT.parse(s);
-        } else if (alg instanceof JWEAlgorithm) {
-            return EncryptedJWT.parse(s);
-        } else {
-            throw new AssertionError("Unexpected algorithm type: " + alg);
-        }
-    }
     
 }
diff --git a/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java b/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java
index 4bde0872fc082..7164710f4e0de 100644
--- a/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java
+++ b/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java
@@ -38,6 +38,7 @@
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Base64;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
@@ -49,19 +50,21 @@
 import java.util.stream.Collectors;
 import lombok.extern.slf4j.Slf4j;
 import org.pac4j.core.config.Config;
+import org.pac4j.core.context.Cookie;
 import org.pac4j.core.engine.DefaultCallbackLogic;
 import org.pac4j.core.http.adapter.HttpActionAdapter;
 import org.pac4j.core.profile.CommonProfile;
 import org.pac4j.core.profile.ProfileManager;
 import org.pac4j.core.profile.UserProfile;
+import org.pac4j.core.util.Pac4jConstants;
 import org.pac4j.play.PlayWebContext;
 import play.mvc.Result;
 import auth.sso.SsoManager;
 
-import static auth.AuthUtils.createActorCookie;
-import static auth.AuthUtils.createSessionMap;
+import static auth.AuthUtils.*;
 import static com.linkedin.metadata.Constants.CORP_USER_ENTITY_NAME;
 import static com.linkedin.metadata.Constants.GROUP_MEMBERSHIP_ASPECT_NAME;
+import static org.pac4j.play.store.PlayCookieSessionStore.*;
 import static play.mvc.Results.internalServerError;
 
 
@@ -97,6 +100,9 @@ public OidcCallbackLogic(final SsoManager ssoManager, final Authentication syste
   public Result perform(PlayWebContext context, Config config,
       HttpActionAdapter<Result, PlayWebContext> httpActionAdapter, String defaultUrl, Boolean saveInSession,
       Boolean multiProfile, Boolean renewSession, String defaultClient) {
+
+    setContextRedirectUrl(context);
+
     final Result result =
         super.perform(context, config, httpActionAdapter, defaultUrl, saveInSession, multiProfile, renewSession,
             defaultClient);
@@ -111,6 +117,15 @@ public Result perform(PlayWebContext context, Config config,
     return handleOidcCallback(oidcConfigs, result, context, getProfileManager(context));
   }
 
+  @SuppressWarnings("unchecked")
+  private void setContextRedirectUrl(PlayWebContext context) {
+    Optional<Cookie> redirectUrl = context.getRequestCookies().stream()
+        .filter(cookie -> REDIRECT_URL_COOKIE_NAME.equals(cookie.getName())).findFirst();
+    redirectUrl.ifPresent(
+        cookie -> context.getSessionStore().set(context, Pac4jConstants.REQUESTED_URL,
+            JAVA_SER_HELPER.deserializeFromBytes(uncompressBytes(Base64.getDecoder().decode(cookie.getValue())))));
+  }
+
   private Result handleOidcCallback(final OidcConfigs oidcConfigs, final Result result, final PlayWebContext context,
       final ProfileManager<UserProfile> profileManager) {
 
diff --git a/datahub-frontend/app/controllers/AuthenticationController.java b/datahub-frontend/app/controllers/AuthenticationController.java
index e9ddfb2611ceb..4f89f4f67e149 100644
--- a/datahub-frontend/app/controllers/AuthenticationController.java
+++ b/datahub-frontend/app/controllers/AuthenticationController.java
@@ -13,14 +13,15 @@
 import com.typesafe.config.Config;
 import java.net.URLEncoder;
 import java.nio.charset.StandardCharsets;
+import java.util.Base64;
 import java.util.Optional;
 import javax.annotation.Nonnull;
 import javax.inject.Inject;
 import org.apache.commons.lang3.StringUtils;
 import org.pac4j.core.client.Client;
+import org.pac4j.core.context.Cookie;
 import org.pac4j.core.exception.http.FoundAction;
 import org.pac4j.core.exception.http.RedirectionAction;
-import org.pac4j.core.util.Pac4jConstants;
 import org.pac4j.play.PlayWebContext;
 import org.pac4j.play.http.PlayHttpActionAdapter;
 import org.pac4j.play.store.PlaySessionStore;
@@ -33,18 +34,9 @@
 import play.mvc.Results;
 import security.AuthenticationManager;
 
-import static auth.AuthUtils.DEFAULT_ACTOR_URN;
-import static auth.AuthUtils.EMAIL;
-import static auth.AuthUtils.FULL_NAME;
-import static auth.AuthUtils.INVITE_TOKEN;
-import static auth.AuthUtils.LOGIN_ROUTE;
-import static auth.AuthUtils.PASSWORD;
-import static auth.AuthUtils.RESET_TOKEN;
-import static auth.AuthUtils.TITLE;
-import static auth.AuthUtils.USER_NAME;
-import static auth.AuthUtils.createActorCookie;
-import static auth.AuthUtils.createSessionMap;
+import static auth.AuthUtils.*;
 import static org.pac4j.core.client.IndirectClient.ATTEMPTED_AUTHENTICATION_SUFFIX;
+import static org.pac4j.play.store.PlayCookieSessionStore.*;
 
 
 // TODO add logging.
@@ -297,8 +289,12 @@ private Optional<Result> redirectToIdentityProvider(Http.RequestHeader request,
     }
 
     private void configurePac4jSessionStore(PlayWebContext context, Client client, String redirectPath) {
-        // Set the originally requested path for post-auth redirection.
-        _playSessionStore.set(context, Pac4jConstants.REQUESTED_URL, new FoundAction(redirectPath));
+        // Set the originally requested path for post-auth redirection. We split off into a separate cookie from the session
+        // to reduce size of the session cookie
+        FoundAction foundAction = new FoundAction(redirectPath);
+        byte[] javaSerBytes = JAVA_SER_HELPER.serializeToBytes(foundAction);
+        String serialized = Base64.getEncoder().encodeToString(compressBytes(javaSerBytes));
+        context.addResponseCookie(new Cookie(REDIRECT_URL_COOKIE_NAME, serialized));
         // This is to prevent previous login attempts from being cached.
         // We replicate the logic here, which is buried in the Pac4j client.
         if (_playSessionStore.get(context, client.getName() + ATTEMPTED_AUTHENTICATION_SUFFIX) != null) {
diff --git a/datahub-frontend/conf/application.conf b/datahub-frontend/conf/application.conf
index 18d901d5ee7dd..1a62c8547e721 100644
--- a/datahub-frontend/conf/application.conf
+++ b/datahub-frontend/conf/application.conf
@@ -22,11 +22,16 @@ play.application.loader = play.inject.guice.GuiceApplicationLoader
 play.http.parser.maxMemoryBuffer = 10MB
 play.http.parser.maxMemoryBuffer = ${?DATAHUB_PLAY_MEM_BUFFER_SIZE}
 
-# TODO: Disable legacy URL encoding eventually
+play.modules.disabled += "play.api.mvc.LegacyCookiesModule"
 play.modules.disabled += "play.api.mvc.CookiesModule"
-play.modules.enabled += "play.api.mvc.LegacyCookiesModule"
+play.modules.enabled += "auth.cookie.CustomCookiesModule"
 play.modules.enabled += "auth.AuthModule"
 
+jwt {
+  # 'alg' https://tools.ietf.org/html/rfc7515#section-4.1.1
+      signatureAlgorithm = "HS256"
+}
+
 # We override the Akka server provider to allow setting the max header count to a higher value
 # This is useful while using proxies like Envoy that result in the frontend server rejecting GMS
 # responses as there's more than the max of 64 allowed headers
@@ -199,10 +204,14 @@ auth.native.enabled = ${?AUTH_NATIVE_ENABLED}
 # auth.native.enabled = false
 # auth.oidc.enabled = false # (or simply omit oidc configurations)
 
-# Login session expiration time
+# Login session expiration time, controls when the actor cookie is expired on the browser side
 auth.session.ttlInHours = 24
 auth.session.ttlInHours = ${?AUTH_SESSION_TTL_HOURS}
 
+# Control the length of time a session token is valid
+play.http.session.maxAge = 24h
+play.http.session.maxAge = ${?MAX_SESSION_TOKEN_AGE}
+
 analytics.enabled = true
 analytics.enabled = ${?DATAHUB_ANALYTICS_ENABLED}
 
diff --git a/datahub-frontend/test/app/ApplicationTest.java b/datahub-frontend/test/app/ApplicationTest.java
index 417fd79e76bbd..f27fefdb79669 100644
--- a/datahub-frontend/test/app/ApplicationTest.java
+++ b/datahub-frontend/test/app/ApplicationTest.java
@@ -1,6 +1,11 @@
 package app;
 
+import com.nimbusds.jwt.JWT;
+import com.nimbusds.jwt.JWTClaimsSet;
+import com.nimbusds.jwt.JWTParser;
 import controllers.routes;
+import java.text.ParseException;
+import java.util.Date;
 import no.nav.security.mock.oauth2.MockOAuth2Server;
 import no.nav.security.mock.oauth2.token.DefaultOAuth2TokenCallback;
 import okhttp3.mockwebserver.MockResponse;
@@ -27,8 +32,6 @@
 
 import java.io.IOException;
 import java.net.InetAddress;
-import java.net.URLEncoder;
-import java.nio.charset.StandardCharsets;
 import java.util.List;
 import java.util.Map;
 
@@ -149,7 +152,7 @@ public void testOpenIdConfig() {
   }
 
   @Test
-  public void testHappyPathOidc() throws InterruptedException {
+  public void testHappyPathOidc() throws ParseException {
     browser.goTo("/authenticate");
     assertEquals("", browser.url());
 
@@ -157,8 +160,23 @@ public void testHappyPathOidc() throws InterruptedException {
     assertEquals(TEST_USER, actorCookie.getValue());
 
     Cookie sessionCookie = browser.getCookie("PLAY_SESSION");
-    assertTrue(sessionCookie.getValue().contains("token=" + TEST_TOKEN));
-    assertTrue(sessionCookie.getValue().contains("actor=" + URLEncoder.encode(TEST_USER, StandardCharsets.UTF_8)));
+    String jwtStr = sessionCookie.getValue();
+    JWT jwt = JWTParser.parse(jwtStr);
+    JWTClaimsSet claims = jwt.getJWTClaimsSet();
+    Map<String, String> data = (Map<String, String>) claims.getClaim("data");
+    assertEquals(TEST_TOKEN, data.get("token"));
+    assertEquals(TEST_USER, data.get("actor"));
+    // Default expiration is 24h, so should always be less than current time + 1 day since it stamps the time before this executes
+    assertTrue(claims.getExpirationTime().compareTo(new Date(System.currentTimeMillis() + (24 * 60 * 60 * 1000))) < 0);
+  }
+
+  @Test
+  public void testAPI() throws ParseException {
+    testHappyPathOidc();
+    int requestCount = _gmsServer.getRequestCount();
+
+    browser.goTo("/api/v2/graphql/");
+    assertEquals(++requestCount, _gmsServer.getRequestCount());
   }
 
   @Test
diff --git a/docs/authentication/README.md b/docs/authentication/README.md
index f6eda88784486..ff4a3d83cfde3 100644
--- a/docs/authentication/README.md
+++ b/docs/authentication/README.md
@@ -31,8 +31,9 @@ When a user makes a request for Data within DataHub, the request is authenticate
 and programmatic calls to DataHub APIs. There are two types of tokens that are important:
 
 1. **Session Tokens**: Generated for users of the DataHub web application. By default, having a duration of 24 hours. 
-These tokens are encoded and stored inside browser-side session cookies. The duration a session token is valid for is configurable via the `AUTH_SESSION_TTL_HOURS` environment variable
-on the datahub-frontend deployment.
+These tokens are encoded and stored inside browser-side session cookies. The duration a session token is valid for is configurable via the `MAX_SESSION_TOKEN_AGE` environment variable
+on the datahub-frontend deployment. Additionally, the `AUTH_SESSION_TTL_HOURS` configures the expiration time of the actor cookie on the user's browser which will also prompt a user login. The difference between these is that the actor cookie expiration only affects the browser session and can still be used programmatically,
+but when the session expires it can no longer be used programmatically either as it is created as a JWT with an expiration claim.
 2. **Personal Access Tokens**: These are tokens generated via the DataHub settings panel useful for interacting
 with DataHub APIs. They can be used to automate processes like enriching documentation, ownership, tags, and more on DataHub. Learn
 more about Personal Access Tokens [here](personal-access-tokens.md). 
diff --git a/docs/authentication/guides/sso/configure-oidc-react.md b/docs/authentication/guides/sso/configure-oidc-react.md
index 512d6adbf916f..1671673c09318 100644
--- a/docs/authentication/guides/sso/configure-oidc-react.md
+++ b/docs/authentication/guides/sso/configure-oidc-react.md
@@ -72,7 +72,8 @@ AUTH_OIDC_BASE_URL=your-datahub-url
 - `AUTH_OIDC_CLIENT_SECRET`: Unique client secret received from identity provider
 - `AUTH_OIDC_DISCOVERY_URI`: Location of the identity provider OIDC discovery API. Suffixed with `.well-known/openid-configuration`
 - `AUTH_OIDC_BASE_URL`: The base URL of your DataHub deployment, e.g. https://yourorgdatahub.com (prod) or http://localhost:9002 (testing)
-- `AUTH_SESSION_TTL_HOURS`: The length of time in hours before a user will be prompted to login again. Session tokens are stateless so this determines at what time a session token may no longer be used and a valid session token can be used until this time has passed.
+- `AUTH_SESSION_TTL_HOURS`: The length of time in hours before a user will be prompted to login again. Controls the actor cookie expiration time in the browser. Numeric value converted to hours, default 24.
+- `MAX_SESSION_TOKEN_AGE`: Determines the expiration time of a session token. Session tokens are stateless so this determines at what time a session token may no longer be used and a valid session token can be used until this time has passed. Accepts a valid relative Java date style String, default 24h.
 
 Providing these configs will cause DataHub to delegate authentication to your identity
 provider, requesting the "oidc email profile" scopes and parsing the "preferred_username" claim from
diff --git a/docs/deploy/environment-vars.md b/docs/deploy/environment-vars.md
index 0689db9b17331..779c3d3d7c432 100644
--- a/docs/deploy/environment-vars.md
+++ b/docs/deploy/environment-vars.md
@@ -79,9 +79,10 @@ Simply replace the dot, `.`, with an underscore, `_`, and convert to uppercase.
 
 ## Frontend
 
-| Variable                           | Default  | Unit/Type | Components   | Description                                                                                                                                                                                                                                               |
-|------------------------------------|----------|-----------|--------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `AUTH_VERBOSE_LOGGING`             | `false`  | boolean   | [`Frontend`] | Enable verbose authentication logging. Enabling this will leak sensisitve information in the logs. Disable when finished debugging.                                                                                                                       |
-| `AUTH_OIDC_GROUPS_CLAIM`           | `groups` | string    | [`Frontend`] | Claim to use as the user's group.                                                                                                                                                                                                                         |
-| `AUTH_OIDC_EXTRACT_GROUPS_ENABLED` | `false`  | boolean   | [`Frontend`] | Auto-provision the group from the user's group claim.                                                                                                                                                                                                     |
-| `AUTH_SESSION_TTL_HOURS`           | `24`     | string    | [`Frontend`] | The number of hours a user session is valid. [User session tokens are stateless and will become invalid after this time](https://www.playframework.com/documentation/2.8.x/SettingsSession#Session-Timeout-/-Expiration) requiring a user to login again. |
\ No newline at end of file
+| Variable                           | Default  | Unit/Type | Components    | Description                                                                                                                                                                                                                                        |
+|------------------------------------|----------|-----------|---------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `AUTH_VERBOSE_LOGGING`             | `false`  | boolean   | [`Frontend`]  | Enable verbose authentication logging. Enabling this will leak sensisitve information in the logs. Disable when finished debugging.                                                                                                                |
+| `AUTH_OIDC_GROUPS_CLAIM`           | `groups` | string    | [`Frontend`]  | Claim to use as the user's group.                                                                                                                                                                                                                  |
+| `AUTH_OIDC_EXTRACT_GROUPS_ENABLED` | `false`  | boolean   | [`Frontend`]  | Auto-provision the group from the user's group claim.                                                                                                                                                                                              |
+| `AUTH_SESSION_TTL_HOURS`           | `24`     | string    | [`Frontend`]  | The number of hours a user session is valid. After this many hours the actor cookie will be expired by the browser and the user will be prompted to login again.                                                                                   |
+| `MAX_SESSION_TOKEN_AGE`            | `24h`    | string    | [`Frontend`]  | The maximum age of the session token. [User session tokens are stateless and will become invalid after this time](https://www.playframework.com/documentation/2.8.x/SettingsSession#Session-Timeout-/-Expiration) requiring a user to login again. | 
\ No newline at end of file
diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md
index 9cd4ad5c6f02d..3af3b2bdda215 100644
--- a/docs/how/updating-datahub.md
+++ b/docs/how/updating-datahub.md
@@ -22,6 +22,8 @@ Otherwise, we recommend soft deleting all databricks data via the DataHub CLI:
 ### Deprecations
 
 ### Other Notable Changes
+- Session token configuration has changed, all previously created session tokens will be invalid and users will be prompted to log in. Expiration time has also been shortened which may result in more login prompts with the default settings.
+  There should be no other interruption due to this change.
 
 ## 0.11.0
 
diff --git a/metadata-service/auth-config/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java b/metadata-service/auth-config/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java
index f9cf1b01e1762..d3c5ba822ac04 100644
--- a/metadata-service/auth-config/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java
+++ b/metadata-service/auth-config/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java
@@ -29,4 +29,6 @@ public class AuthenticationConfiguration {
    * The lifespan of a UI session token.
    */
   private long sessionTokenDurationMs;
+
+  private TokenServiceConfiguration tokenService;
 }
diff --git a/metadata-service/auth-config/src/main/java/com/datahub/authentication/TokenServiceConfiguration.java b/metadata-service/auth-config/src/main/java/com/datahub/authentication/TokenServiceConfiguration.java
new file mode 100644
index 0000000000000..0a606f0f06d92
--- /dev/null
+++ b/metadata-service/auth-config/src/main/java/com/datahub/authentication/TokenServiceConfiguration.java
@@ -0,0 +1,15 @@
+package com.datahub.authentication;
+
+import lombok.Data;
+
+
+@Data
+/**
+ * Configurations for DataHub token service
+ */
+public class TokenServiceConfiguration {
+  private String signingKey;
+  private String salt;
+  private String issuer;
+  private String signingAlgorithm;
+}
diff --git a/metadata-service/auth-filter/build.gradle b/metadata-service/auth-filter/build.gradle
index 2dd07ef10274c..61e9015adc942 100644
--- a/metadata-service/auth-filter/build.gradle
+++ b/metadata-service/auth-filter/build.gradle
@@ -14,4 +14,6 @@ dependencies {
 
   annotationProcessor externalDependency.lombok
   testImplementation externalDependency.mockito
+  testImplementation externalDependency.testng
+  testImplementation externalDependency.springBootTest
 }
\ No newline at end of file
diff --git a/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthTestConfiguration.java b/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthTestConfiguration.java
new file mode 100644
index 0000000000000..05ca428283a6c
--- /dev/null
+++ b/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthTestConfiguration.java
@@ -0,0 +1,79 @@
+package com.datahub.auth.authentication;
+
+import com.datahub.auth.authentication.filter.AuthenticationFilter;
+import com.datahub.authentication.AuthenticationConfiguration;
+import com.datahub.authentication.AuthenticatorConfiguration;
+import com.datahub.authentication.TokenServiceConfiguration;
+import com.datahub.authentication.token.StatefulTokenService;
+import com.linkedin.gms.factory.config.ConfigurationProvider;
+import com.linkedin.metadata.config.AuthPluginConfiguration;
+import com.linkedin.metadata.config.DataHubConfiguration;
+import com.linkedin.metadata.config.PluginConfiguration;
+import com.linkedin.metadata.entity.EntityService;
+import java.util.List;
+import java.util.Map;
+import javax.servlet.ServletException;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.DependsOn;
+
+import static org.mockito.Mockito.*;
+
+@Configuration
+public class AuthTestConfiguration {
+
+
+  @Bean
+  public EntityService entityService() {
+    return mock(EntityService.class);
+  }
+
+  @Bean("dataHubTokenService")
+  public StatefulTokenService statefulTokenService(ConfigurationProvider configurationProvider, EntityService entityService) {
+    TokenServiceConfiguration tokenServiceConfiguration = configurationProvider.getAuthentication().getTokenService();
+    return new StatefulTokenService(
+        tokenServiceConfiguration.getSigningKey(),
+        tokenServiceConfiguration.getSigningAlgorithm(),
+        tokenServiceConfiguration.getIssuer(),
+        entityService,
+        tokenServiceConfiguration.getSalt()
+    );
+  }
+
+  @Bean
+  public ConfigurationProvider configurationProvider() {
+    ConfigurationProvider configurationProvider = new ConfigurationProvider();
+    AuthenticationConfiguration authenticationConfiguration = new AuthenticationConfiguration();
+    authenticationConfiguration.setEnabled(true);
+    configurationProvider.setAuthentication(authenticationConfiguration);
+    DataHubConfiguration dataHubConfiguration = new DataHubConfiguration();
+    PluginConfiguration pluginConfiguration = new PluginConfiguration();
+    AuthPluginConfiguration authPluginConfiguration = new AuthPluginConfiguration();
+    authenticationConfiguration.setSystemClientId("__datahub_system");
+    authenticationConfiguration.setSystemClientSecret("JohnSnowKnowsNothing");
+    TokenServiceConfiguration tokenServiceConfiguration = new TokenServiceConfiguration();
+    tokenServiceConfiguration.setIssuer("datahub-metadata-service");
+    tokenServiceConfiguration.setSigningKey("WnEdIeTG/VVCLQqGwC/BAkqyY0k+H8NEAtWGejrBI94=");
+    tokenServiceConfiguration.setSalt("ohDVbJBvHHVJh9S/UA4BYF9COuNnqqVhr9MLKEGXk1O=");
+    tokenServiceConfiguration.setSigningAlgorithm("HS256");
+    authenticationConfiguration.setTokenService(tokenServiceConfiguration);
+    AuthenticatorConfiguration authenticator = new AuthenticatorConfiguration();
+    authenticator.setType("com.datahub.authentication.authenticator.DataHubTokenAuthenticator");
+    authenticator.setConfigs(Map.of("signingKey", "WnEdIeTG/VVCLQqGwC/BAkqyY0k+H8NEAtWGejrBI94=",
+        "salt", "ohDVbJBvHHVJh9S/UA4BYF9COuNnqqVhr9MLKEGXk1O="));
+    List<AuthenticatorConfiguration> authenticators = List.of(authenticator);
+    authenticationConfiguration.setAuthenticators(authenticators);
+    authPluginConfiguration.setPath("");
+    pluginConfiguration.setAuth(authPluginConfiguration);
+    dataHubConfiguration.setPlugin(pluginConfiguration);
+    configurationProvider.setDatahub(dataHubConfiguration);
+    return configurationProvider;
+  }
+
+  @Bean
+  // TODO: Constructor injection
+  @DependsOn({"configurationProvider", "dataHubTokenService", "entityService"})
+  public AuthenticationFilter authenticationFilter() throws ServletException {
+    return new AuthenticationFilter();
+  }
+}
diff --git a/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthenticationFilterTest.java b/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthenticationFilterTest.java
new file mode 100644
index 0000000000000..2ac65bf09c912
--- /dev/null
+++ b/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthenticationFilterTest.java
@@ -0,0 +1,53 @@
+package com.datahub.auth.authentication;
+
+import com.datahub.auth.authentication.filter.AuthenticationFilter;
+import com.datahub.authentication.Actor;
+import com.datahub.authentication.ActorType;
+import com.datahub.authentication.token.StatefulTokenService;
+import com.datahub.authentication.token.TokenException;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import javax.servlet.FilterChain;
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.test.context.ContextConfiguration;
+import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
+import org.testng.annotations.Test;
+
+import static com.datahub.authentication.AuthenticationConstants.*;
+import static org.mockito.Mockito.*;
+
+
+@ContextConfiguration(classes = { AuthTestConfiguration.class })
+public class AuthenticationFilterTest extends AbstractTestNGSpringContextTests {
+
+  @Autowired
+  AuthenticationFilter _authenticationFilter;
+
+  @Autowired
+  StatefulTokenService _statefulTokenService;
+
+  @Test
+  public void testExpiredToken() throws ServletException, IOException, TokenException {
+    _authenticationFilter.init(null);
+    HttpServletRequest servletRequest = mock(HttpServletRequest.class);
+    HttpServletResponse servletResponse = mock(HttpServletResponse.class);
+    FilterChain filterChain = mock(FilterChain.class);
+    Actor actor = new Actor(ActorType.USER, "datahub");
+//    String token = _statefulTokenService.generateAccessToken(TokenType.SESSION, actor, 0L, System.currentTimeMillis(), "token",
+//        "token", actor.toUrnStr());
+    // Token generated 9/11/23, invalid for all future dates
+    String token = "eyJhbGciOiJIUzI1NiJ9.eyJhY3RvclR5cGUiOiJVU0VSIZCI6ImRhdGFodWIiLCJ0eXBlIjoiU0VTU0lPTiIsInZlcnNpb24iOiIxIiwian"
+        + "RpIjoiMmI0MzZkZDAtYjEwOS00N2UwLWJmYTEtMzM2ZmU4MTU4MDE1Iiwic3ViIjoiZGF0YWh1YiIsImV4cCI6MTY5NDU0NzA2OCwiaXNzIjoiZGF"
+        + "0YWh1Yi1tZXRhZGF0YS1zZXJ2aWNlIn0.giqx7J5a9mxuubG6rXdAMoaGlcII-fqY-W82Wm7OlLI";
+    when(servletRequest.getHeaderNames()).thenReturn(Collections.enumeration(List.of(AUTHORIZATION_HEADER_NAME)));
+    when(servletRequest.getHeader(AUTHORIZATION_HEADER_NAME))
+        .thenReturn("Bearer " + token);
+
+    _authenticationFilter.doFilter(servletRequest, servletResponse, filterChain);
+    verify(servletResponse, times(1)).sendError(eq(HttpServletResponse.SC_UNAUTHORIZED), anyString());
+  }
+}
diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml
index d22f92adca8f9..5d72e24748072 100644
--- a/metadata-service/configuration/src/main/resources/application.yml
+++ b/metadata-service/configuration/src/main/resources/application.yml
@@ -25,6 +25,8 @@ authentication:
     # Key used to sign new tokens.
     signingKey: ${DATAHUB_TOKEN_SERVICE_SIGNING_KEY:WnEdIeTG/VVCLQqGwC/BAkqyY0k+H8NEAtWGejrBI94=}
     salt: ${DATAHUB_TOKEN_SERVICE_SALT:ohDVbJBvHHVJh9S/UA4BYF9COuNnqqVhr9MLKEGXk1O=}
+    issuer: ${DATAHUB_TOKEN_SERVICE_ISSUER:datahub-metadata-service}
+    signingAlgorithm: ${DATAHUB_TOKEN_SERVICE_SIGNING_ALGORITHM:HS256}
 
   # The max duration of a UI session in milliseconds. Defaults to 1 day.
   sessionTokenDurationMs: ${SESSION_TOKEN_DURATION_MS:86400000}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java
index 6b2a61882be90..d47e1a0a73401 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java
@@ -23,10 +23,10 @@ public class DataHubTokenServiceFactory {
   @Value("${authentication.tokenService.salt:}")
   private String saltingKey;
 
-  @Value("${elasticsearch.tokenService.signingAlgorithm:HS256}")
+  @Value("${authentication.tokenService.signingAlgorithm:HS256}")
   private String signingAlgorithm;
 
-  @Value("${elasticsearch.tokenService.issuer:datahub-metadata-service}")
+  @Value("${authentication.tokenService.issuer:datahub-metadata-service}")
   private String issuer;
 
   /**

From 79f65563126c478fc4e254ea52d60fb645e26aad Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Tue, 17 Oct 2023 18:41:45 -0400
Subject: [PATCH 144/156] docs(datahub-lite): Fix recipe (#9023)

---
 docs/datahub_lite.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/docs/datahub_lite.md b/docs/datahub_lite.md
index de0a20eed1d01..55491e3b998cf 100644
--- a/docs/datahub_lite.md
+++ b/docs/datahub_lite.md
@@ -85,9 +85,10 @@ source:
 
 sink:
   type: datahub-lite
-  forward_to:
-    type: datahub-rest
-    config:
+  config:
+    forward_to:
+      type: datahub-rest
+      config:
         server: "http://datahub-gms:8080"
 ```
 

From 9bb0bf25ae338c552d9d04620b4a7bdb607924c3 Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Wed, 18 Oct 2023 14:09:27 +0530
Subject: [PATCH 145/156] fix(ingest): fix typo in parsing list of groups
 (#9037)

---
 metadata-ingestion/src/datahub/cli/specific/group_cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/cli/specific/group_cli.py b/metadata-ingestion/src/datahub/cli/specific/group_cli.py
index 9baa8ee68d975..e313fce33d4d5 100644
--- a/metadata-ingestion/src/datahub/cli/specific/group_cli.py
+++ b/metadata-ingestion/src/datahub/cli/specific/group_cli.py
@@ -43,7 +43,7 @@ def upsert(file: Path, override_editable: bool) -> None:
     with get_default_graph() as emitter:
         for group_config in group_configs:
             try:
-                datahub_group = CorpGroup.parse_obj(config_dict)
+                datahub_group = CorpGroup.parse_obj(group_config)
                 for mcp in datahub_group.generate_mcp(
                     generation_config=CorpGroupGenerationConfig(
                         override_editable=override_editable, datahub_graph=emitter

From 5937937819c00afcef2f3702f51778b4452bb47d Mon Sep 17 00:00:00 2001
From: VISHAL KUMAR <110387730+vishalkSimplify@users.noreply.github.com>
Date: Wed, 18 Oct 2023 14:52:07 +0530
Subject: [PATCH 146/156] feat(ingestion/Vertica): Fixed vertica integration
 test Updated vertica dialect (#9011)

---
 datahub-web-react/src/images/verticalogo.png  |  Bin 11194 -> 25763 bytes
 metadata-ingestion/setup.py                   |    2 +-
 .../tests/integration/vertica/ddl.sql         |   19 +-
 .../integration/vertica/docker-compose.yml    |    2 +-
 .../tests/integration/vertica/test_vertica.py |   30 +-
 .../vertica/vertica_mces_with_db_golden.json  | 3840 ++++++++++++-----
 .../integration/vertica/vertica_to_file.yml   |    7 +
 7 files changed, 2699 insertions(+), 1201 deletions(-)

diff --git a/datahub-web-react/src/images/verticalogo.png b/datahub-web-react/src/images/verticalogo.png
index a81047fd43edbfc20d51263799454b96d3c565d8..5da38f4e67c7d4ea72c02983bc8cf01c9b10752f 100644
GIT binary patch
literal 25763
zcmeEug;$%+)^7+Y1Sdd?1$QV~io3fMDOR*tvEWcNxJz-jwou%qKylZW7KbKyacgn7
z>3i<|zH{!n|H1v%nyfsTJkQKz&+NT_+Y+s%p@@e~feiov@RXJ0UH|~-Pyhf83dTV7
zsAtyGq2AEkUnt4|swbhpP#@&14U}!v)dB3NYcK#U(jEZ%dkgACfqJ1HFGK^Nqu$Z}
z_7wvE&$sB%LiGQ0jfR>50Fbx6wgv#C0LpUGx;|(Jd5>D5KduHt!=+$zj3Doj$1J*q
zoF6z(hv-_Q*alXZd|;(Yu1+|dS|+lEt;A3wlNi6_hnwD&dhf&BtY@~9!lDf~fm3HI
zoqiiPU(RwC7K8<sN)zQka#H`@L=w9J{8!zR4$VeSf%LTh)(AtpAHz<WJid@kZpi-k
z1AiYbbO%>Pd<>kc67<$+`0)Zli1u#{J`K9=?P-`+hzzZvjPP0<`7^Y)F<5c%--9ER
z%>BPpg7aE#Jl#4z+6TQ)3=LeaqG7rZqB$xEl|a8EVaL)m4{B99T=BuPKa=&%Wz5H_
z?c)WCl-jm4ySLD<bAMF(Fc;J9LU>SXB)=@&lOOSM)$;2ym6nZgtQ-5clx{(v8?00_
z75_WHH88cr+W!ul!YCm!)KEq=`7WIpq5gG7Si3Uh*2!%&b>yCu-As2-D(Cwv!UF@~
z=%l>t_TU4tYyg(xf*JfYQ>S1zFo3LvMusU6j2n{0Qk3#OQ}l^_`}_ACBk1rZ^H3IF
z+&kPrn+w7QwZ?QQ$<`nU#d>aX;#%&5o$p(da;u#?sEuu?Aa+o=GYIbon*pHNEPVIu
z>ng5(H}7N$7YW#x8m3iGY|X|0R&|GITCj+Ye0$-Z;{GUv8uSWF1fsj9fl$$X&b$3Q
zw*mT23Z&3ejrHZ|JQlr4+GZUx&$4#4c5}N2^4o>=x92b9YC@Hl2t*>c6L)Cm*DGVx
zaoKe=HFjY>_tUR4*RuohyYn#v$c;9qaBcN#;D?|*O!hjKV+WE6(PoB&iBGS!fvUy5
zEz=#L2!?{dbS{d*(U%e3YEfZ(ngO{u^^k}X5vWCLy11F|!8$^N^!prmGp=@E^NF>U
z&fOb6-AHs6Z*#*Zf7-s^ys~>GiJ5yh4;;N?AbN(z734?##ywY9mh(i({MOl+0P)*D
z(W^nt^(T04Sb0|&p%qpYYb2hSSM1D*W_qI^mXE%fvD&P~rTLV>rRY>^C(k;rtQzb=
zhBhg1NDzQOn)AD=a7Jp6eV;!MYx5iT<a8zO=5Gbm-c$QHwA)^RQ}ZfqJv-0j&GpIF
zW+0Al10_Z)w>e-7(F*3gKGzt#>z88+LJ8SO@X+jq?-*qmXOXoFe(TkVLV&>1;o^I8
z{M_%<)?mk181YTTeHXurqr~yw>|^JVuwPjyb6h}3Ey{Xa6KyrDB0`zXLaBd!^~(R$
zryV?F{O+1ih@wcK;MwnJljInw=U6c1@4u$~3S%?CC+^>0VUFVM5kUyy`JnW^C5Ivc
z>A8sX14N(PW;nx@qV$lAcZ8c(ampr${|hVnq|6x_6%hgGonWGv@XurOxxs!YYd@am
zySIdwRK2%`;xc-v^jf08<vB0ORE9SJR3+Q1?136rnWoX;D5(MoNzAaN!Uy<$xh#A<
z*oy|83Vj7_kzmojH-61Qi2p79cj1Ae{POpNN?t-jD~+})ZCc8HJ;&!oabwkjz}xA8
z+`H2!t+>%r@!;dP<I-fSP&}`=9L=glz$g;~5oAXfcvg%Q^xT`)2v4#Tq`B<<dE|>W
zntX#*lt;U7?&{U27g4-6Se>SKqzD|r$8_fkb6ez%P(x$E9D*^@d;lM>@oTc-l%wR1
zxoWT`-o|qu^(E|Rx4HFzad=6XU*}_ZIwX@MpCDWUV{<>^y&&8$$%E)znziQn0oO9C
z#LE!{YWQ`BSz0P>(kv1&Og48j@QxFq<gj7+mFv)!LU*CX=lKtmY+{u*SNYxv{hU{V
z$CcPQUsqioEk%WO`$Sq2YlU+6^V7P71Fv{ri0;N}omJs}SyKtFT**9=m1F-smBFl1
z>73>sENSFGD{x^T2@A0?PnNLJ7u^l}Zdocz52Po|00=hcgEyn9VqrXyn&rt+Qli9}
zc=0!a=PIl&JWPK;J`V$ETVJQWav>t*2kf7<p?WXEImDl6OlTKBM$?H5O<Ul&{Whv{
zB~#HOSL(dOuF0Jner5i)0=8vs2$Jg#0Vq1Y65u)=(OWW}e+hw<aln5-E*YkOes4m1
z9GydOg3QlwFU)=Ago$iJo^@mr{xPZUJLvwVN?9)gT{h6QDN+9lXafazIw^aCq48R+
z7AB}=Pa6S^`|hOi=}aO;9^A@G+3B^cfgM+(M59^p_7zaO3Clg1jtkknFXMTVZQXdM
zU&1AqKS#Ti<XLi8^hskxO8<-pSxH}B=`fnPrD!5-S$iSCzlZKQct4V-*%fT9_#H_M
zW}=_Q2VoRKN($Sh+xE5AGP;*}ytzF%yKG8~)`_*Yu{V+_hgk?-w^vPUc?wBL?OidE
zsz&sEC8^6DNlXUVW+yz`Vn->TLOoDxX~_;BJVSdMq5tS<7d;}mo1*qNkB4ERfOWYb
zd?d+3IQ3@e^pMzZ&e>^{w)LV;d-;1zR&vqar)Uj@&lAxe%)63l7Jar)f>LBP<xZ8q
zPt0zWHP;&J{UAbvfR%#h_Rk6%NY8sP-NjzjCd9-_rGXt|&8veobUqX=iaj}`X~z)p
zj9(o2pnH1|pVXVR6)q!Ur?@0Ql~D8b_!YfCeh#z^uKbMxn9Htv)cJgWQxD__K91OX
z!8%vBgiQmeZ*a0mBZkL7gg_^`pr$t4PX;?p1lt*QJpt1qVrX&C%)4Txa_3fc7%uHO
zNSkR)oy9g1l?Mo&{~&F6lpVnP=q=A02b(%#g24tv2Kbv1#_3KZ>z=t?I<ye@XTnSX
z+%4@<R|aDh0}rXmPkgH7&xCH`k&CR(ZXNV|0?)14I|b2}@G%|SLT3d#)j`UJnEF@Y
z?9r;h3}mAHo@_2C^_&3mQtZ6KncZZqlLPHa1&>*Bg*t0Tnv2pZtkTrZ^r&y@kvNki
zuFz4S6)bu?`U;Tzf*nU%XtXr$m5|LL<G1^g91Vztu$g?5GopgQx&;6l!vK{Yo+-tv
z3!GVPS4fn4p(Jz_a2Mb>bef?YELkr^Bw}Xs=`w5I4hCRuwnO(tTay=o^83-AmjM5a
zk-WlA6J)oN^2C84;=VmRE-&gut+I-_78^KUD@9mPv~T!mw4w~$&)l8BZ!4f5b6{{^
z2_6r5!BB)_l$M~gBiEY6m{4JA+N=;mT|a}PdK6YnmGICjH;y-OOO>RbC)a7;<brMc
zPN;+tf(9w0fy)w}hCS-tsvK_l*TD4MNA&lLzqzf1aW^6fmXK;|99oAtYabn|iF_Yl
z^EMvxF;Ku<<ta{#`|IA~SRtD(xH-?wDCgwAyr+m?qcH!ovt`E!^4~0>@rF|qO-HId
z@TByUGyP?hJMghl2iiGsY}!n&t)kGV+7e4`pco>Ak-ytmk7Vn=Av@1<-Yh9C2;K5z
zZzT^@+0oTHj5erL-tKvj#|+hG`V1vsv2uwt0S1~JZS1g(^{w;l_<j|i8>UkvHhJx&
zyOwH;dj-<CMDu5eXuv=3A%AaMWRgQJI~hrwV6NN?L1Yw6Y;V3uHRd)U3-Ag#h(up>
zN&EE$CPtjP=WQE)YR@EFSVgjW35)LYPM7BSqwjJrhR!?gKwhKD%LUo2^v~ObfC^gT
z+HkC8Omz~2@5GCWtO<n3AdAY%H{6ByRV!~PO@Ff6b5~uzS26*8rtnbTDV{DLVFfk1
zJqi&a<lQ%e3y}e*9+^%_Y*0p<0lmASUCWkbv(OuHuZh`9i+ESuU+V!ojT4i<gH%!Y
z4{|zB;AFqICPD-7UXl@BB!6e0e(-a^$<5y70@G%I{8S@}i=S_=gEfb=gqY_kjB=F6
z2d~bRKZwqb<26sN+lQf`!k;o@jRT4+HH5N{|2!R!7Lup|B-(srp`%s?09CtfDpMXP
zFUu{tB>}T#%)+B1<nOl#)P^6t6Bb2;#~2Fn6{(BXu1v7MJ_;bGIBiI^xxv1yOeyO_
zZrgeGs4ZS0I5<5bL&u^AA34ivKk4|iMICM?b(K{e12bPYV72XxX&!lbwayM)-+jWz
zQ@627HL0NtBorbQP)7um{)E!rYY6&k^sf(nvg49Z){P7nfg%&+B{L=Pc5g&7t&dbM
zD`s$uMWBI`UDe&&hQs(n8&P?xZyP4}GJVZxp9DI$ahH$sJq}stYndzJz3RXU(vdRe
z?2-rYF1_j}efgjSBt%?qY8J`kD`eG+jW7m+pW;8u6=ruAi}}83q^sNnF6)Eb`#7SL
zLOD544cT<O_Kww0x1_1?C0X0eKaLBhycB#Lx&E%ZZo8=}c`5-z@z7UOhXqNyx;1S;
zm8i}Rh`iAllA#=`j;VR|81X%w4oY~^Es1Bwr8)aX^_YqBIni&c04*18@{00EIutd@
zs+B9cq=BW2R{pEI=Y~3w489**JCCsaox^uwj^sklOH)hgQC8G;dEpKCEK;(jw&i?o
zL4=F;evZa^36#U7VJc$8I|P2qx{vp|4-vxm8vHH#{p$k(5~^PG8x)O(R0%$q_KK}B
zuJId6oCR(=nA(wjQ`);BSxMXaQTRjjQEfjc1ATwiQ#a<l22;2be80HrP8ctK40uk)
z+j!kkXYJ?q$fwB~oKvjVj9KK^=8bgNZGKNys+}7WKyCC#Xnt0bMWbP30FK%!{Am?k
zVM<RYNkQvT*AfRRITYCxKjGMn=5qUb26ziyQuWjHjh#{QJpeDa?~}bqoZWt6@4C0$
zL_bFJ#9IUVnqD?eXi(5Z(VnOUvs{L16<~6VJ()97Sq2}Uyft(T*g6Rj%rVX^fBbhg
z2(4fn0&g;?NJ7%ez8T-))NnKC2lCk6<Fh&Qf!J}{R(oHaH^Qf3@s8ZzeM*)EY|D6A
zoBotqz*o<Z`dpO2sm{J;iq9lMfLeDgt9!I|`%KDuVQt1>>l7NqlDSUoxEg6w_dBa4
zh()T(mGTEND(JX5_V|jDyvtC7`Ueu$J6rLB^o74^OG>l9mk<WM{^~hPK)=tFVVeG3
zI)+J$Tr0(fksbM$SDeuur89v(qEC!IQdO1TMCPl16*U8K#rnvYAUSsM>)loQ*ZY3g
z-HApyQRS55E8dm}Kui+g5x1v)&jK5A#mf7TVO1|baJ>%`D?rUujSH=aMi-1|*&|}-
z=hSHhZkq43Xc5!sTnA~DVl$k_{UovaG2<(~WfjMik2Q|vx7<j!2O`@QzV{Lkl%ya%
zJIU4a@i!Hv(WG(GiR{B1SQEgPOPih>W{Zt{+!=vlt|adswc)domyw~G4mjkcBLX$<
z%Yxs2)uRDmX7FeozD{;O!d7j66L@RNL0PXK{vQ<-3lj-gQJEi7ev&y71t=Ub*Tnjm
zP=|0Qlq;31OHn1vLTT<XG<F;|UR2o|(beq2NP4ZGE{EqPZKovSV+M)RosXMRgr*=~
z{*(`%=G<HV(LAG_R{B)$gmhqLvZ<1vZ#$nN+7<{k^ux@4^wTzfc4(8P`v*NW0rgVa
zsqBkrY(lNb5>HH|b?YBrDz!zU7EkqtKb82A&&$U{UJa{L2NfIzcnJN$gIXE+3171P
zQt$pa(-Uc|i?lE7QiR$MH7D5X8RpO4#6_hN6>mS#S^ngHaqHm@ZVZwO@^pWW29#j7
zoX+T*D~r0{Pyy{KAy&Jk=PZ=<%t3r+5MT;jLLU8)8};o0^@8oIW!FM`s~tgdw4>;b
zi)W`+uU+-`TgejWuMF@5McxLB$wh_^9jdVwfFZIr@Dy=Nq4H0OoREu<WXo-82vw|7
z?dHc>kSPIF+~t&!Uh5Iim@dkm9>WdK-wE>7jOtb0%!#5b;)3=LDr-9aT0+8CODz)O
zA0M2Uup&iW-q-W{&249-fs9$hpqoB}By>}Z$FhT^o3!AD6Q$vYnF&Alef^}lG4io`
zc^-7#V=?ZBw))gvnC?iLaRD^{N$GF~ky6<VcN=-Z5iaRhYs`TT-tc=nFa9*?U^EBs
z2hzN+=4h6TkU2c1&q}B~i=aAZ6N_55)n$7cWfjPR7J~tj+;s9|YO>$boQ#1vMF_R#
zaMkp6+c#7@woC^eFMj8JH8|w78z#SuBnYr?I?kA2Ngx5WpqY@2kgnWfi7*y-jkaPh
zAk7&TWG9SRc8Ec9j9kbW^9iR3_+?A%d;~gTFZ5r#phkO6L@bey0`pajJdoU>Ma3S-
zm3mQ4Q+?0#^9k$e{r8<{){#y$b`{p@pyP@t>3ssT+#g7VE3)1j=gFt>q<w~I{$Yo(
zI0Y-s#P8<R9bSv(x=)t0c?ccC5cZdA!%-+tQFj91B0roY{xsq}$C3a@K&}Wb@JKur
zN&M+@$6MJI=u(U=O4Tm&lf!hy0k2wLc(!7|(JGPdhJ@8tSV)RmH9eGJ@?KetI1~Sm
z(CGvuitKaq_T`?P^MFz%<<PS&J2|REMLnx8{Kvk0jpcYXQYJw-;}To@A2}v((4S?l
zJg?3Ev4=hG8pe<)iazw?^1h8Ukl7<n`$xGvf-r1GsG8UGrQI6|%&McPod>7O?9U@X
zOAnN9BSHbf>O(24=}<Jq04w2&LYhE3%LKw@KoEstIU>3+iCvxc{8Tf+Nae3(u@$zy
z$sn}-&Z#<jj`;*!i{74z^GA4$NR=zNL^LP2F0_G@e^vm1Kj{4(<CKgFpL!~#(CSJP
zgba7S0SA`vhU^(+#F0P?>>%zGd@mX{6hI+F286S|dDv{+k^Eds^hvV8e(YA@j6N}<
zzGQz83!!_-rJ|ks;(f7V@}xU^x+9jNtflZCqom0$iB(LT#AWF7884#c!)CX;^@?eO
z+=p-QxTQ}cMlXJ3m$Z3@P1#klpr_H5G3i<F+!?gsJVbPCwLkszRsyEx$So+MiI(=g
zpxek!X4qZG-Y*jO^{p`4Bq7UQ*xM;4I8_~7Y!(+YkuwN>jek!pec8v3sc@|GoOHi#
z=vtg8fe@<o91x=jV%G?sTb?fIx~+eYqS0eeYbyIWF7?vDk!T=+@~*l$Q5ek`Tr#oG
zZ1$<bKHi2ZL;l2G0QjId5p{Is+NBm$<LFH0gw1djAD{FjD2#8aNA{Xk4h`#p>Us&`
z-$0+uE$+YR*W>Zms7`WyGJzaxniD@T`x*M<v+)Lr7QF<h94nZc8Bic5jJ^~(X>IJ~
zT9VRBZh5Bq2`>hs`|jgomQHgI3r~NkX9+8^@+Yr==k-AtGIR%$>R@h`xQeHAKqt9i
z247wqQ>zKamaj$HI%1GafY<Er*BoQGT}Q2vS}$CH?hO9;azjL3jE>cbP%_&g##xNh
zU&z?)#{OsU1@j{vRsuWRyYTJH3cG}ye&3V$o)n4unixV79SdMavBI#sjU6&-u^m`I
zP5REe@6X!vo-%Q0AinNP#Mv7;I8O#P6ldy$w1`-m={HL^i-|zBagkY@Zu!n?^E$)e
z(4=}L=6$!Gf3$dC&yz3M;(V4m!ec$!8D3ASgW{Tg?KodvD~k5%F3HgX;1;!QI)HZN
zlmc4h*Vo0Xu~Lk{3b|u<5`X{4J&7h^OtnwOANVR`sb>kg+XemJZ**DYwVcNKvi01Y
zq!iU1ZgzlPVKUr7HXNp>#TO$2B;HoOvZms%{5nZivy5Q0Cje$6quik{gL7P`$qF>@
zxintMJ%a`&=#q|yiM7Ox!%S#|o>V7f;<A4dMB{o0gixGzI5ppq`MYLy#=<mTG*nGL
zJbYyu7Wr;JfQ#97TwM23P71SUZUb^R;P#vsN(C7szcm4Mau-Xs5y3wb8xY{NLPn?F
z;9eQyvY@t#4b(awowt{Cs!6u}H2?i7=#Ts*I)eaQa_6F{qzgyuNMq<T*AEd(^fsJK
zK2IU1b6S26mpY4wW4<2!MlKqhc{{V6xhW*aRth6~Oh;AkH(bTmzLB2X(4r#AjEO&O
z3c|CY4E{+oxRC@g0oNWqYG=NPo1i6fd<p!ymZSHbdUv!bX-|kUDZg|DGbcKgUF`Fu
zrpcQJQ>AP1G9Tod3Yh;mQa9g>!mM3!&Md`jC!jxFpGbM%`4=v4`P@_32!|Kk5@Frl
zr}9i-+32BgpIy5-^G2KX5^|m!2CeA{tkAdaZt5q7PmdjDOX5JovKDlf>jaayk!**f
znfAXUFT9dk#fahqkBQx{w0CUk(7Vn0<LF$vl|}1fd(<REVgW%XnPa62GBt26+;~ok
z$Fb@r_t{vL^S8|14?&UY;@E<K`6%YOh?*bt*P?(gJ`ZcMe<G6w1IX%Yd;HG2q?X>G
zx9u@g;F!+Jw?4JQvoqVHIDF(Q3j>~0dlRRXwGkH2(_1o{xP0<#fWTX#E2Qm)fKK1i
zw`^clA~ZEZIH{7YZ9tF5c5x15%kQyk>~r-{ey$4eIL-XY1pHvk-vDTGNPb2-pxF0w
zggWOO;&_1ZyH0M5x$=Y7PU7@4N9>9?aX5o^qc6qNjR)_gNWF!3f^r+b!TwEenJeot
zi?`sfC{Dz%ud>n)BmxA=k!>jH7Km^35;1iGj^(*2Q5*glvStNiX^E1RFIPpt!k(NN
zb-th_DWPv(r@G7*&=M`qsTz&nu&@*E8BDT9yuKq#0A5`_->;?^_R2QA$1eL=H#5wt
z4l0)p7Ca<<$v`6iwFtB&-7d@Z-~;$0d8pNVFzuL6BdX{)EPp(ZAPru4qHx6!z#Eo_
z=k<M2ro8rj3~k$0*510tW)8R)lEYEBPJem&D`aTTt^mW`@m2J#G2bXG1z}2lHTnh*
z&6IG;Io`oHUM-j*VoCan9Rk;fWD3@<8>25}T-Aa!Lax48yt<td5&dX#O`_83k#?}a
zn;IInGI5eWRzCl$x=Hs#pLCS1%+&z-TPoH7ys$%1mGP7-M?W-_VA=lx+$w&l83E(G
zVf<%0ID0^~v23AxM}FeR)g|ExJyi}BT93%}{Fr8Ai%nt!OnLnY?yahRwA%9sL5E)v
zJE0ubHY!tpvPaXDpmAmxMteV)^$cGOl=Rs`k9vdXu@tv#qc+AOb3U1evG_hk+EmzD
z3a8MZyw<%Sp?58|NTYIE;5`;>L`~_dx3<z}q$5hB@gm1B>Tf|Wgl-6pC@6<zZ|vt?
z9BExTYBn9xM4%uas`G0l#x}GnjV{ifP1PBB%QMr7dAVMCL3%AVU>{^6CoM*)$AcZ7
zM1}86Ml<ut++o#uwFNh7KEeqR8#>3kAtThr5K-bx6S?NO7<J##UTzc_inp=Y?$Xi1
zzd}1V$eEh2_vw2ZA>Q2P&^vTwR38364oR$AN{a#i{@2ghSj)=^Gnvh4_Z|B`N8`@N
zB|MwZup5tGBklWnkjyr;kEm_vxf^OqWA_qL2(_u)#@7Ux@s{40Xr`8E)zvs`hWyT#
z^}R>Lp>R=QHmJ4ZwbhRI4_r-cPg?j0u-S0z6R;Q0E?YIna;3!|*Q_1GF?Qs9eHd|i
zW>8NKb@kQb{18KhAF^Tks5&{8uA&!<=DD$+Avyd`!V=crO<G#Zgz|3vVCd%7Hzlxq
zMN^?D1<^|eWUxlQo0B0=Uoi*G!mU@02i_rJByUSvKXG=L#WyvkUF<JQnf~zeCN4Re
zsh~JtisIV3k10Xc#AyCOo<z4fS~A|=QL_JaZ&9O7+BpIPKEEwuE5>K;P6N^v=nrc&
zzn1`AGkmvVh!~4sV^KKI@EGW!aefrVq}uy6y)O6aT;~Tq=8_K)|EvP9#G5~w9!9+6
z5k3)eF~5v2ktHGjteLTUNcXJ86@}NCfONKk^BSH)<sOY^R!D`Dg|k>+j-I=!s@3&A
zyp58MCf1+R7OZ?ufpu@v%G@H?aS9j3>8p{)&#mQxe5-|2x=V@r-?<}pk_{#|6jAsy
z8(=GN`C+uvga}ebfrc8_OU{UtASjV(t9z5_Sg_u6;s7aZGi3Y5N1{0Lf}-P-TV&`Z
zSL7;r^t#7!U%szi*wkEs@C%-TUd^;c>Q4)948Sy&?&Nm$!ctv{7mwyYfxaVmzBBx6
zUopx0*5d7!L^A1rK+#{H^L0o&{veSlr|0W86cvtjexs-`yvnC|X*ui#;}eP~fzsqA
zEHjnHP3`wt?Bf++SgWFr1wY)iw)V++@wfEXQlMT4IDn>S^UJ$k;!DY}%h)K3)04Xz
z#dF*BH$vD0i&7HQ{y*NJ`|jX`PoRx*oB^;+l3ot$Nce3jzOC0A#4Tuox)GbYJJul7
z$MH-jzWnY&xeZ0&ElIbkJ6$skY3#r>x?@*_lFU>g2)~PAd}x*Rp0O-nOS*npirRhF
z8tZbYJOuhewp=zc6n=z$_q6HJJ@H(`$m83brsiR`db~>24DR6%#swLI3^=Hxmk++h
z@p=+6cn{&wM2?}%DwI*#hwr1}exbPN$o8w~o#kN$>mtUaQX;$2Cplk<uW80Wc5csK
zunz2ZytdSrtVS9yn`A7nztG@qeB}7cdwW<gk4l_TYuzI$CVDMQ;4bVtk~KWmRU$K?
zP5bu6Wjm*ddxt^70$nx+WX=-Z_mv1#3_T%Y(5R+`(cej}G}&7G(wM7wD)YvQUO=^4
zuFwmlK))tmxc!66?a}ft(4}mhfB9?HukXsWUf<1&W7#mDTCvZKvar^vI)9oa^?}kQ
zsXo0zhu$%!KYJ<ZxeGg1R`F{S_+GSXQAX@Q^QdnRld30(e&i=&TQb>yB>o@zVkX^+
z?yyg1e2Zn(1F>r&)`KCFWYf)R84R^t<qsXX(K1a0a=#4Fr^11%Y+_J*myhowl_&@l
zG{g0;^S^xS`2c<FOdZ6^BrZoZ$8h+hx3-7bmJ);f)StzjpWSAA!uz(K1?b5F<EE$;
z@nGB0^K^+@JMsZSees#jBKV%Re*nW!!t}#al8Rf#n3t|7PJe7d-(JD?OzL{A_G%m8
z)tOak92uH86yK%~^S;bkpmb`WOF;DR17HQi2JS+mC|(E41q<3#NN2!JDTgD+W278`
zG$0QlLC3iRQzqJk(M$<Ia{nsp<ffnJJAFsmf-mL4yO(s-k1{h9fuq#7MG*fJHuSpL
zPharNA{FgYOi<4Jb7mg$cjxWE&NkK$tDt-WGKmaI0<fLg$5`S_0zI#%rW0+0AIc1W
z$FF@HbPA2DQP0YsDUIl45L-I8sZOB67YHQS$yjB5O-T{4KTsc=I=ZRwbwbsURo~Z)
zWLvl3uSk6ky6hLPZcB@jYA5tzhRHTovhyH9wlo#4qOoUPIVTN~nW`x!>-;|`<Kr?Z
zhuaZ}&vw#MusBGQUrBKn&PPv^vCxTHn}jrC^M@TPURZkG`m7Z^E{4>zp%5mQECr4I
zD@7#UWDW3<1jtJ;#FT9K%Ri)<!tiQk-2X{9A344RF(4QSw0Zem^;4+%=5y|JBiq+P
zCN%`{b#h&G@Z19OiI3<Z1X<V$tztXvN~0_zm+8;VdGv43`<m7z!bw9iPsTE+jyO39
z_)0&?@=?7ZS9~1n`fJ8`TFc)w>>#=*@99O!NzvuE!+k1X3kS~-<OwtbAKJ(Hgttt|
z2iU>oM##3Y-Mk<m7&DZujHT|<7qvVLTbjmWf74a{4C=@t_~~ShQxk<i+(`UkDWx^)
zs<(t+ZJt`}ds5P^)wdZq&{$DLk_Glxifw9|uSxB&ZK{7TB8$=ur0e$|ReeLNLV`vB
zl9_@%jU-!;e(060<U?u%Uzjv}e#vc;GN)vAVG^F_R}JIccjbXuPkE_t3DMpPvSMyA
zQG;vy`l(^=i9#C>5`xN3G`!BM!CZjfoJF=m_xqye3wzR1&!FP4^S6C_I4<vHUg@lc
zI!%7}Dz}U1n@T<k$<o?pJy;xv)iTl9g2;!oM3Nihx^IuirMr{1jT)Q+C7W`dhGEp9
z|5%<(dMMCg3SNK7Miyz{!_e6vD?Z1*zoNNr)b@$GfDtA&b2I&rkAb<Lsf<cR*~dD*
zuFnWK>1FbznlFWM>Jh@<5c!C}40tc_K+Y<h69sW)R-7(txMy5V?^CJH?OwY-xIQJv
zP#ha{OW!U%q{44Lx^|v$s7UlTqk7~e;O}ALxOh&7jn~3f#)p!Lo#120^2NEz?~HCH
zboAS-6efrrIlLX{XDV15kR?crP&g^bZs^>Gf?_>?vO0(GLCIY*`NT`z(S<V}X(AS?
zQ`)?CV7170nJ!m)tq!7$t8tx<#fLYwhqDiUr1u4($hEv<=M8DlVtV%>n(-__GXWNg
zspM&&z?XocYu?+Mz?-D2RS>i6i}6mITsF}!-egWGr@ThJezOLApajcRcZ#JSTvQ1x
z)4k8)s?Kw?y&5jQf>~x8H^?qyu5N9+-lM!f<<bcJFjwntErE~&dGy5$dq|WtLPH2i
z@sLz+ue4sDrxMJ;XQMuWl`?E-cIa+r+eQ}OE^dh~Y{f4t7xbxm@k3wF;EC*s_8X2n
zmis5nFI{<VbiN}?U{)i}Fp_H=T76@bvNlo<_7j$wC_WV#r$JARlFGyKk3byZEd#g?
zeo>;B>p>})AZGzN)Y9{CBP!-G%8=9|ZEPj3{f>ae{>ocT)KlnkhntHghDkoCHnGx>
zB|R>rBxul$S(1Kw3zRQ&Gl@MvsAq9Qigqy82=1pPd~5il(XQW!mx7+=C$SKDkX6<2
zCCQgXl*Ig{yEdAO5)s0ptsQKcWWNr+BWY11Q&6Hq{hwkSup=CvrN%cou8Cgmz9(>o
z_Z1e?Bh<`=-nfwnrb5yBmN!^XAyuN*=Q4;~r|;X0VmrT)z1B>T4zDqb)+(yg9E@Cq
z))Ly_xm3egMg!$5B2Y5`V+Le^$obDkM7dZN^pT}At~kv-^)Qag73vU$gO6kBHl8vG
zUbsp|q9Us*!I~a83v4VI@`bz^%rmy7SVx8l(erlBB$8akUFEIUWbl3{bW2J&!>vhf
zu?l4fWXQJpaz>2fviKPYtwoK;J=ydpK@J3((L}ud8yzP_s}9{fozUuqW8D|mqh?SF
z4h?)M5B4trONcByy6B%If^>Tw|2U)^P#5Bs9#qn?3HF#|oFMFWt{(B)ht+-+m2RtK
zji~h&@rx*d4_k@Vg$c^l{EPM$AR+nz94}Vn?+{IUl-2hNE0>1u_yf|a2^=W$bpUAR
zfr8Kv!Cg6`5D%Rk--*N~xWCG!Nco6&W6)AlU(co-sa&xT)-5z0=>R&tth^&T70HEC
zp*&$xQkcX-R8Bz><~PD`$4X<A<O&l&&T`W(^59#a%N@!d&C)iZM|k->FG||9{kN6A
zl<!AS4r@k{A()Sw{N;jvw!fLf$E0i#<Ucq1c0{)}r7-=0!X?`@W8h)g=do(5M=Xu`
zUl<|;%)Q?J&6KK!*SGKS&m>=@r%4sT{t}l_ZTI3bDMp8uo^m|=mL&f5W%{pg0k={e
zuxX))<070=nG$DOmu>tZhhTDt#VAbfwyacpUO{PbP*9zXCqjiDNTnV;1~|k~3f9?2
z2mWO(urlqeH{^oDjlB517@W;~E%nr<l2a`7-N+5|-l&s7(wL{!J@AVDc<3n0?%O04
zzRc^$P^v_-dSGtl%!VEM?L*4Pi=v14HWwxj<JV@WK)iWNu(TUFvtW|yUjk7pOqVc#
zB-s3ONqRI{S`jWX{rep!HW>{A?`W|4SEVKm(+mjscV|$tjf)$*?HoBYvl01l2@R02
zh`15Kf1qO<{Tb9dR>qpY@mK3g2N$$eP`Us++&*m;qwSBrN3j;n3zy}%4#Q(l>C57e
ziEO+1+Nk(;+%L`vJ2e{B^N<^n7v0)|NiBXTSALvMJAu52PbkvXk5y25zzF|@$$vpC
zn{<HjFQUZS$MQ32pqB9okl+imSRjFeL6c#L7xb4!u~tR0^jB{L1))d@amx}R!pitH
z8-yn99>~4<0_se=jh%=1hDDWdB$YfCZ;2)7cgoC|4imY$`j=f$h=~>`oV&y@aFnBG
zaY!1dKAH`dqdUgx=>P1xHX70;%aZu5{*P;~pVXwBI!&Fj3H!AQDwg3f@Ij+aASp#N
z6>lSN8c92@t#wapj*BGu@tPOfGt{nCb(R1!c9!dwG7owm<UD+qJpLiu9~_QiDTQEk
z%Vk_j*{zTi_o6&Ev-MCm(Xg&R7Q^2n1Y(xpYqmC|?Zu`loVXlIzICY<?^7f)O}zRy
z6%QCO3wdc8bwa;Cft1mqCCE{c@*L9DPTWCJ77Zo*0&>g-2{py}VmVPR=crHk(j>a)
z4#*oW*mU4U^Cafy2OcrL)Dx2ST)iwdrWNBCES^H9faen7MXxsO`5Cu9k9i?KdPPZr
zfw|#6EMZ9JoZOL(dPp?*dptmNoz51JBIZNf>B6Ud<VQG9a%3qnA2G=ty4oMaI16f)
z9x*`0P=jkqsu<i^=as4O*N_=r!#Ti6S%gXWj8mP4+%~PAHb^9_ThVU22W@F|Nbghu
zb>ux6j?F;hcpJRo1p|_U9tI3%6?C%=^g}mnxArAIt~;8=qbCCu!gC0<>zK(VcXltM
z$aUFc#M`~kIEq-+RelU4UEoG?cvqpg6|mW!W<H<2Fd*v*TnmIBVsR)O$5z@yt{BB6
zNraGP<%-m}w0e>HfFDlkMB*(wF{Oc;*91tual6xqk4Ye?Xf*FoS>YerZRN~d&cQOk
z39Q5Ne-ud<bYZ*O{m1I;UDqUC_vD~ya?aRIPD)XSMaucF24Pwg^aC35!G>dRwbum)
z8`Vw%N#2&Iz<tuLf8YJf{s$k?fNa!u*)ah9`M1&0mKvxW|G~QQv?OSO-YdkTJ`pQ-
zNkexD#IIBBx1UNN6Q1F2(7fId!KBUzc~jzp|325znYN4buW^Xv5opj1ZTu09f*Lp1
z&YKUOM1MS%ER+rMGCA}itTMq}Hw}Kt*pJ5k$od}2r$kAufR7vMo~JgP*IBCyZS35i
z*HN|1)6np{Lijhv9rTH(WSKL9RY!@<hvFM{co~N=PBji-$H52KP6OdJ<0HM=*v{Q4
zf75R=0E)#jhjokKKcrR8XOFYMWLRQxxjOM}7SCD@uOT}s7Av}g*XD9=93G)h<j<LJ
z55Lp@JlIJ$MzfSS^_9IeUSJv4IE0CzCD~qvk<ZV@*3z84##x4Dk;0pqyMNwY325Q?
zu1N4DIb}UYCH=PcM+=rzB^e@81(7#WOTX_URm)lp?|FCJruDI`5J$C2w-AJjb^o%|
zq`1~<I<jdGbMntIb#RD(t~FJdAkX?PVVNZf8f51g1#W9DZR)Z8Gf4t#6EAsMS~!7)
zAK7_(utdF@J2z$q)9NFtcY3uEzV5U(US&QdWPa{G&#JqS6OuW^=Ut@0f^L;@@nM{v
zpMOm0Ba=0bU>$fEo(2Q~f@>Q#x_=cy0C+!9OZFZ?jtWv`v!Qf7q!ofjGrLw81bn!V
z)eO;CSTh*?Y|9e!4BNkOT+5Eb8aMahQyN-h+tH#k?a?_t=F}K*(QUA<o)C7Uj$Wva
z2};;00Bh)3Hu#hhCf)wg@=C!RxC%`|u+=j6XNS+URv9nCXO+nwn6;{pTz*$#v;|<j
zBE6>jsT2Z<YJ>Z`HTI5K#s5I=%xvRyFN@FHYp)(N@wmG9KC4G1WlsZv42ZLp;6Pzn
zQCF!OK2Bf}__&C3d3rnzqbi*o0tjMF3QZK1VF}kaoc}sca!qpXzauuyKI{x$>tP3d
z+v5GJ7^Z%WT4$B_ouK!Oowz9)W}@x7PYve(BlDxApg`y^oHj>y+I{y%RTPgi9*cx*
z#Xqw>t)Ok7k6JI}mT7AX05qzg@=F$ouK82_`nMwV<+-G2O?c^}Wf4-XzMKgaN1eGL
zUBu}prT9aM=U#(ddTl$;i`V#PIpPK1K1R8qnBY2)-}AxAXVIb^5EPc6!ZJxLBq%pP
z+;-v*p3|QT?9ddZ$Qsk#AyXN~U&{Op$;Z#bF=9gUE?26cc}hL~>oW&`6!iei*{N?t
zp@~xIO$!nZz5Z%+xD`dZkyLrV|0Mm5ArWG$A!*2^%o>`f6(@*GXE=Al+lERJ;`3w4
zHL$aKX`W#6giLlaEG4NRxxQIa*8iofuuvPBhnO8Ja(wi&mc+p?9lM2j+1c%PE?zN=
zRdg_%<p{Aa6U~w#+!98M$|~xKLcWz=A37ncBGc@ZKbJ{eYcZEb>Yq?Zwx*y7HRq{<
zj$A7%-dKQZ-CH<3PI$I%95LHjWX<qjo7u?I^24hT<{~>svZ8K{l32S{t5y6bT<2ap
zR@1hhe;anxhnjP&m(aQ((u27cs%{IOJ@q7P&QObKY{Ne&*NHQEal`6S43>6JKO$qr
zoW_n7fr=-)iZ5$VqrYWV3>r$=&fPJs*goC{N3*jjAcRR?MwXB@On(qoX%s!*l1jMl
zIi%C7j?DN=e13uCpHYaHmD1=R<>R0vB?4gu?$sJT1lO)9rxCg6SvuZ({3|s&i02z>
zSmT()&F;^4M0>=BYw^$|gb+SE&H~}$sfIKHlfkQ7J^DjAi0`zb)E%%!l_33;62iC2
zdKH&*E8K#Q7r}}vvdx}l>V#DKTc3l+efyg2;(b<Wk{(U%EjaWStChVfgPyvJ6|2GU
zJ+E@epeS_AyudYdAqy=KdF24r=rhtop5DAHs+-(nK{)glhY97e{8A2PfcX7Zi@82z
z_i1M;R}>~jgG9-j69`=kBt0{o+H1{VAjHqAVnbUUzbCr(S6_FzsO^$M55lxA)p~&k
zETD~hMITWQ@INs4ULXSf)qm;Jf;rO=@-gtyo{1M7%86p92}~?9?D!J)PS3(%j8OJ#
z+KK_F)x`GEj%*!tH1RQ-7e){|mA#%(xFrz+9m6?(=NNFgcF*9T9mzoTxb%aU#uRq;
zTlPx#v#h^BD*$Uh@?$*Cyx64Mch6ToQcw9_Jp;PXtrksZ3Ti)>C3(J_fpGG*D+{pG
zrkwdW*$U4^pW_W1Yfa!>Q=-CuR;4}sc{%q$d^^BEY`9&I93?5sDBv#%r=!tADS%6q
zj~cmW=m}#1f~F7H*NLbEQI@oBbfs4qqfIyOc8Tm9X%inK-@?vyjBR&euT$3E?r}H7
z`izMareD6s`70A|h=ri)xL3a0#1+iWL2+*^FbNUfJOy!mj{)y4LDgYnX+<uB-kgc}
zAR!pl8R_tqkXMD`X?b6A0Kb`(*KqQ1#D(oiBG*Seinov9e^`#uvVH8jZScdKG-^<=
zl?r?;jk6@>9Ui!Xk5{ftdD;4-sj%P&qiOh6-%-oJG7*ntp|KO~%SeWY^9?1W=+%Hq
z8saR)s;ZsiaT$ATFxZCyhW*s_l?UqrW<)9WU$s#92y|Iu#UX9x3_sFvyTYSrr}AK7
z74>`f7IG=t8-LX}bBA=96_eYAZ9sNsA~mfM&bL}_l(=S(M_cz%S+u(802=NQ4QbXa
ziiivRe@2uC9Y*}5n+|Fj4r*`Tbc0G9B&b?5{$}dNx2l9YR^V;y!sheTYzya8yyS7x
z)`q3WKh?<ee5yr}4+bRd>+3`7JXW=p`bR)l(RSZyc7>m|U|O!|HWla91(@PT#!9)w
zqmooRh!Gg4>mA0=DNRTXvm}kG@FU4h-SOw*A}+Pl+$}DWZ4|G~P)DW&>>oLMiX<s|
zPlHPX{kv?bSS%>c1BZ;SJCiF*cPZQnA5~B=nfqQGe7tCu_0IwebmvNT@cs<As|JP;
z-E5`5Ko#e}y;xwx6h2nn3eLDou^4}k%CrjbqE~q8NW7?^nJ+2(daDvc@IL?5%;n?e
zEWpt-tLow`g8SAZ7QOFCr2$K|ji;D`I#)>y=3!kQ|3(uC(I(ws%?x43B2)hu`|U)C
zdiKE5)4?mVgGHsOgSU`B`jMy<oGn(NWo3FIy8(>Tg>Q?c_=*&d-6?|3BW{F8M>%+4
zYK9Eu+$jAf0BUVK?G3I%GEWy^{#_m_^XJz270$K84vrqUEQHrTEb~t&7r3)I<b;j!
z0dyI4Zs3=9-A9@s@`uP%=q=rdI)b?3a-*EDo|HYvbTD`qwZn~6Xse*(5xt_Y2IzV?
zA6GlQ79DVoE<L<(Np=A%`4;AcWL^_4@?Y)>G%j(Uj4A!9OMPQ-8U*|95Ufg-uy*-V
zA&pOR-jincY=j?Gvw;!`DBQCl^hvnq?7=^EM<L&7G#9L!)-41YoFaCqZ3&hjIFO@@
zoGCh!Y0H;9KZ@xIe;)$Emr<)xh>I=gjLyFM)b~}b@BV>U+aoUEgqrCJPSB*^9?8)8
zH$FoJ?;||9K`YrB-s=Cy$Dtb!$at6gXft9Kuc!o7Q*l(B+%d9YRo4;{aht52nW38t
zpegIVnKI~|GA0Pxg|!xK*GH~UmAjoy20?c}#h{L*Eupz>XkLOAA7MmW{!4mbo&tiB
z|B`5je%ef$JG^=Q+m_$gYDwvZuLvSo^=MKQ0P7b0t-BnF3G;2&-_z8ZvR$%q$f}i>
zM<UUziK-i*1sZ}h6t>0;AE|5|#9!;AH5TpCYrO=TSir<R(0A0PCyE$~JWYL%nb#z-
zKLb3-S!tY0#3_Plf61GRzDI$-`>sxgUD&zElW?M#r->re=|fNItN-S8=P8*(DJGJO
zwD|CF8)oPra1EAY2;lgFMIVkIa){nZGrEvej&t$;*j|c5Rybx{k5p5di9SglVgLH0
zggkr2i)b+kuPaif-HJEF@Uvq{MTPqFuHk~>Pa5sFep(vs4VkRmH@R$mwyI;qajMv(
zmm?LT-}&$#@2LleO8e_NQ(MjXb1@;`#p+|b?3=8&d=m8Ipd|h3je-jd^vxgGt3S%o
zI!FnNj7=e1v~t|JUT3vH`W|cDQ<K^9G14Pc3MJXU0P%dk2rSvKlpcjue=T2q0K?uV
ziMJ|0WPH1!Nziv=l>di<0wABz2F&xfIlcJS*F}>((-!!idh(#fJOE9=!;!PKacMYy
zI2YFDOn#ZW^o@+{upiIp&%0s?P$%(dj=*ZavRvU)_#nn<SH|<#h6M!~c|#3PD=im5
z5ezo~+2pTEX1GR?Y(}!gaU+hTdM`ME-gLRPoH9n_)?2l#+eTr6V<yH=U0G_W%Qe<a
z#T%GKpOea5En;8c<(?{VrJ7*$-C8<%XA$F>NSHHRQ4QvpDr*9X5eKw(CFp#B2Y05v
zjTI5YwTGIMKxk@1tp8@)$Pa=V?>$PN&9XBctyh9*(nPu4_Q&0RfYpN2+YN`=2SZ=q
z-(&ZIR5B?^09!%RaJO^1PZjj){}2PU^Z(U8fN^CHUfZrOzIdXCDt-C4GDXz&XDpcS
zBKovKX=ox<PM4@A_kTV1zb~}1yH#qZcVM|M7PE0t2GD;rsSto5dmtc>E4jMm|M8%?
z0fJjt<^P)Te?I;X1^$Nu{~uC7B%|9WKPQlLA5X3B3i>Uu;9WK2zg0N@Q|(;KeEd`;
zFrzrp*jP*Y@0VrF@DW1&#orUmPMvQAl(iqz)>f>abX5csJVu$mgz#oYo<QSfm3UOS
z8S3u;YGoi)Rs*{$8A;fG2&_gHRQKXpYq{QkNGmNYR1jTvjfWNd-y{6RC7P)2#%))<
zivQTHLLyXlLs`Ft7U6#kGM$g=wwqcn*N^^pDJjJSR@)KA{wc}0BP^E1A#G0oeKS!+
z12gZ52vcg8<T2j4!;atd+~?oVI%Z!nOQCOtX%OTAN1MN31)yIGoEzKy_#g?~;LU-K
z{!A*Ye<fzKUMObcB5f8vTO<yiVNId8<?(pKx-@c@wv8N`CM|NbHT<;q+lh$ZpNn<M
zHh(PM9UkYi?91g33ZS~?J11wAoOzcI+I@TL*DQ1N_jjQXzrVMT(KM(H7Zu-T=?X?^
zN!&cjQA!d@B;FPJ%3DEGm+(Ne-<+}Cg=50wlpJ2v@9J@h0&!m9eWvxWw97t@2zYu<
zv6D6}T3Gc(+NyF)I%xJzguv7`d~tZ2^_=lMaK}@Nx<<25f+^NEorao)y5>EuRB!fl
zTA`S8gH%HpRia;^<I>MUu|QsHd?DlI9y-y_1f-Qi>yuMlQ{j(`Uh8exWEK1vDioCV
z4Ls{GhYIvMgpJwOTjUg!)KIu!fB7SOMSq}{ZQI_Z&b)TlF@3MKG^O(m^pdD{CWB@F
z(%N;)Hakx~B~BWHW0p-30ANJ>`(FT(W9J!zb*_%{gM@QdnQ4C&8_HiZ)q}^Ksr-XG
zeh6tci_=vr7?}{koM@4C(Xr4@axKc*g&QyuWHHQC-I)VK;EhI1D?**VO4?#4gt~c%
zH&+58x1}V*Ki4W%^yQ>C6vavhjGXbLZgFrs$<YVv9pCG-x@4d3W}hxtbYPDrctDU7
z`6D)E=UoOE!#N|KMS@aewaN}d4Yh43U!{_7Jrw)cMsOlQeUYO~R6Rc>)!(afwDE@N
z`}V%(i*z}lsFXlm24`P_dU~9UO?qNe@`H?b7NrjE%4*(F&m)up8VvrPjjC(;E#_|`
z-~mV-)2RP4?Z0udWzAt{W@pH@ghcSAv=>S&%2OW6w1lUVBjr6gu7p)wpNzcRXBK_g
zH)LKBRgaTZjmO*-e~$X^0l}h&FoACY9j6MXureo>Ow}3x%cdsYC-?OVjmdX+QTNwA
zo(IzlgS~DY5{|eNDR)|hQ7f+(f;NSfHX0Z<-UdGRugR_*X~}(lE`Is#IO9+D91;QO
zo_6jetZD2q{8F?cLS>4|b{^VO-O-(nIHq{OOZDA_@t5uBp-yTuNR2dL0;YSG<mM5?
z&8pZq^93OLQT$PX$*H!&sE1G>_7IJ<Y~zXcd|s7WAK&g|h#1!<Hl$@_%l5cM@cW-Z
zsJvhF9v-hK6us!9LoC}C{jb~u#DsNze=ZUcPiO01|C)#IDC~)SMPj{RFUyo{idt~o
zpxx(+t}69Goo|VX*%dUiHx4oC%yrL<2A#it_N7YL;vv{ea2ni_yPd)xB3|*Oe7@;4
zn>Q%tDU{v4V}_DYZ;5zTNH$Pm*hftw=d3#e)7sO#({qK;fjhx-HCDwz+hCPZVuani
z=X9}I%$Ko(2r@_KKi`b@7m=5uaVN}+-p1JnouC_eXWDOz8EWhA8odiA0xj=@g#DK)
z<uwN<E~=>#RJFXH@_UF**(3=Qxks#T<QfQ7K+5|Z`>w4o&4d^R7m|lQL<ls08-)=v
zjs^Dp#=eZqYv898us7*_Zrx`vv-j=t!J#Z3&&)c;gd0`hEXd#-7-Wj!1n!a*9_~BR
z_Y|VgJ!c2fYZ(LOMWOB`9~5PN1}~a6UNOONJjhZR&gH{(reE6bypzX!!upfIx@P#e
z(;v;)I_4Wemz`5qB$qdh<p>4u^QIq7eUQ)McsDK#$4qKHq-sC$H`^GCE!y>JE@&~l
z^kZXwEl!tvy}!B(&?vF=6cSRIP#<MpctmdPTtBmT`0fwgn!y4I<F~np&+|?wY3r|#
z>HT6OL%b&N5v4NHGh1*wUdXcZdKVTaw-4>O0gX1pY!awiWedG!e;@&%zaE)REV0<*
z8Lr7hmh9&et4A1{DSY2Xmp-n9U*kx}f?XWD!m|ByU-ldH)bH6`ZrWMTsw+sp99*kl
z*ybqvbXTRa-S#YpclSK5+*@e)-QH@cCc#gUw6RqqPm^!At8kxI<+iBQPF}UxK5q(_
z{(1~vpY^RJ+i%pOL>$(^E6-$bM>6YIyJCH|#-lFc-ks#<TjUVD8IIxPK5`5_U{GD0
zICB`iSU_GXdsEJK?}@BDa7VoK6aq-2P`y?Vu)|)hJyXD2Na#mkPLYYxN(a9e^}H_|
zJ!w(x$f50U&gZB)pmojU@l6KM%OsX7DyGUL!oqS@X<PkQ<0Fcfk*I9DK|{`$3!ul8
z{^jS{o(!3@vmzcs!`|VIi1ryv4<U`=<1WZ)IZs2f7=a=KSG_Xvq)ciaCqhm6g-`D2
z_+@9zwGLDK+Fd=<0rLWh?@>cvPr|c|&b&vP7-!o>B%VTnh{7(ET71ruG>0S$`{Qb+
zkQ`g<v>=XM>kxOJ>3F2oo$mCNT<e&lY-F3zST<qvqiiQH`MtX9fJWCNUJbj2!Oy7m
zz@Ci>JSWYearqKFD1p&@KTlqthU!n4veVkZ$0R?&5Ld^bJJBgSsEd9z$#2}<40|15
z5<UM@aRqHU0+yBbt9`I}CHI#)_JIU?Qxmd1ck1sicfWFCy`n<<P=<2WW8VGsoC~AP
zs6Vn5vI)yR^+j$CQz?!*2j>%PJ_Wf*`1Uk*){_2WJ??d=zX6^4k0tQfoqBh<*kbv0
zz`Mlz{#R9B9u8I9zONJ!6AEEumvzX_2$_WJky3WXQW#5QY}qpQeIMH(k)6qsK|+|Z
zM%l8CX%w<#-{w2&_x|4Ny{^w6GuJsY^PJB)=X0Lte(w98cHt`1H|r^pbK4F2oPa^G
z+EHgwAPRU<+>36%8PAjhEvQ#NQfH$Ce{X@nXVYp)g1_FL_?%1jr_p?j+mC*@H*1+h
z%4G1o(j)JlBHQArryX=$kwa&}ti+o7{OE1y;1#IjKG$${K~)a?<Odv@T(OcLHw3ov
z4Ryo!EPO4AFYAX#a3=^mk1q|$&Adc}5sf?d9?ZC!B0AkS(#Y-)4CxfGX92X}YsXSL
z*ch#Os!oj*HeV9M_TA;3;l=`OHZB?+NaJg_O9RnaR(@hf+%g8=G(&WxXe%R?gZd7b
z8b)PeY6Z3iw+TYsfVKR*<loubiXv?nppJ?1_eUYrkeeZgi3T?ID$C`HpM9@t%&9=>
zV&lTw0G!m&mi_MASI`x!)Rc(F?2|yk=`%Aqv)G;pdEZC;(hp{<L(!uJRcm;?sVZTg
z6zw5Cp68XX>H@y`-{2FeX1xk=3<UvX2LRl-CkjPlT-ZNK!ellDewlZv%L~gd!H1WE
z{P>_nLh?vRWDz$QhZzk|^93lA;4&|AE7aNGdps~UA(&^%r<GmVVJiV?T8!#-s3Ny4
ztx`~k-+;#4N908ma0iy&=KIIdsmORVXM8_DKW$EC_L?uj`<F<}7DlD!Dva-W;czvo
zMGZOB*SV2rK#lj{FM5u$f^c}Ozb4%b`F-VI!r)4lP>)<U>hmYOPP1}3=B-N;Fx=2^
zN_P$VIT^sF7wL&X$xeQtfVbK6T$egn$mq6j1HPG9VEu_ROnEiEt!Zs{b9r?bZTDFw
z-kt1s!J1cTVcc9X$ld=+;+R+dt3-CtRhP|UDH?DLQ7vZ56*NQ$znTS<c~gIN2ZU~}
zS|iC9&Zg=$xSZyme_a(eb?5}@;n-#q&ml);K16sG{|TwIw#x5vfk6KzX6=Vk{l|OR
zXPc^eNioH2Xf1rLhwD|af*BFrcN?6jG4;)SH$$*UEB_pJCCKZ5msEc>(|e$y_C1wL
z6V|QN3D0Nt1M~>-?&e=~AyNNhL+9r8cUQFJlaNZH_mye%EXR;@u(am{#Cd;5vf0O}
zOuEljG-uWV5skqL{AujlnCb7|R#kL)BuO<Jd|F;?^|{_Q-inJ8Fx%ylgnz5({}6LP
z@oX_Iqb;cZRwoIeFhz`u(?bXjh=W^;I8T13I7lX$HgwwH!$5H3BL<mu1$zS0?Mt6f
zxut2|b3|Bvt5{13Ccy}%Qd*u994$C)_LDT|(Zs&h?v2hRj0;w;awk96)a8uNNl~9X
zOWzGsgY^)0VzK%bWA25=-+f8KW7`4{F6W{R8tFMz#whtQL9luH15*W!p(NLogl?Qt
zZ|R&wPor?jJta1YTpLx1=TEOWO?bjvMSAW`0;pjDr-^tzasP|_K~&z(qifDo$S=Na
zO|grfQN1pDKIW$uOljs*U2HO+a|NJEz;P&>ejh%WZ4lFhe(fEUZkwWAnYl3EU#J6?
zlR|gM!PTz;SLTtN`yq<@@g6lRp9-1*j!{?ucUfnWmvx%>*?FkFq<mR(fc0RBgrRwi
zHI|$v?mwCQGL*&UqmV)loHuW2*q5YchF9E?@7<V*?AXR4H@!Vqr|r2+c_So@UVfmv
z2?Yat947{GwvJ<6Akh=iF{DiAel_RXZ=_AV77O^U*(P$!6zgFteSP5>cwlETe$ARr
znaY{TKO#(8Tu{*LXIqtM6;WXYs%>Sja%tdM*g9s;@uGd!TL3~ph?(3%Vj#D&x7<V&
z^wxD03bWtKFNo=AUM@2Kmhp5p&Lj_cS$Y+g^{CnYw3)4~eySfMegDwSgs3~ZGpzio
z<amue;(;aZg@VFjo;N;!u}YIkGguR%=i&I~%9>3X5sBk;spC_9Xdpef-U}A>F`kVC
zCXk7pz$~96kcm`xc81#r#av9dUWUNKw@Qb2wXiE$ocFdKzah*yEd<<1J?I(<${wmf
z$A~Z1k~#bcdV#bvD4xu!A!kBXoh0s|TdS1*B@)?Jk}?|mix>yR*?d6Hu6g<rB1@fr
zvW`ENRxP#{CrEkNDcK?W?5xex(+Cox>us?L<djqnSMwa%-KuU@i`?3kQ<xvMQz*B4
z=XihUsXWX*BfFo&E+xNWThqAmBy)Lcq4%ljUJ)XXR2kf8w&_5qoE#k^&36BG$NV0q
z0x|BuE!gl3oeE>MKl}ekXL<tzmRc`zjj9Lsx=AiFjasPW;3EE}{8=CO^TLST6Z{xq
zzme3RnWvA&j=mYbq%9tT_38+6<?A~6wacBpJo)s5+FQ>G1N6pUvJx`86yfM069o9x
z`tz}TB|Dg2t|-shJL4{I!W)AHJ+^#b6H{M0#TG-8ba|8Oq1#b(aI-#nD3qliFQx9|
z7fn0VSivz<@f?y~%~~%dy)`?~>owr%8Wz(&Y3S^Lh<;fVKeU471KiS$Rf>5shOcXE
zf0`<4yfjq0x?&y)KgOZWyfx0sQO^raVqI_D4`Q|5<e;fq#ARk!qHleQ;P=GaO5%J;
zKJ>Hg6uHt#gN?^qK&sp6Fj{O08)2xI%qn{SjE3)(sL{B|PJn$=SC?0Gj|cx)hWTX(
zm%rpLqf3qSd)qrtSMR=|aSS!Lw=g*rPNlrmfSqiwsOtssqq~v)r0dV1794aB+YEl_
zzKPjaXafZWe)X7Hkz+WwLF3`vBe$OU_Nxac+M1Y^o3DIumG<}owkhy3Semr$YO?ze
zB#6GcF|*LievbX2vT`~5UO{SEXNN@VAMpzJ^d<gM=Mp=eCFV7E_me+{PT9)gUDTse
z$|d6SPAi;aEx1}Y)A!&yDX*<`wbR}lDUCWVp$7_YS95zMrWP^*-oBK1$C##B`)mhY
zPxZaL+^-8-4PU=CKSy*z(?83VTql@=^TQZ{@4wbXLMgZkzTX?3My)a&BsbbbCR{l4
zjv)_|VDhzLCmCn!IT<ye8>XYM=V0<gflngK8^EMnRQQJ%=2WLXcodfeTV(X}2}2De
zUd<OiGO}-)1!lWQoe<mxMC`D5I)s4@M_`{QAY#86De~q9*wj0{gRKLpa+&R6+mPp)
zdq9d~)8awQL*+Ki1{HzX2}`V)=+IjaJjM-DQ5&4G*vNyK`y&TezU>Di0{atVOEZ$4
zG@ULoDWiXJE7L1EN^e~eXLMH=SEdq@6ccmZQw-c{*J{WKiTKlGr>i1gBOOiz;<d_m
z=g@a!&}GKheK*JGm;!A|sfw1MQwv8C^Ao5H&$ILuqWrT&w!yl=*XU;m?xgzHDoaw=
zH)}O<nqqZ4e;;EM=i9(Q!P50CDdl3X`?LJU*-~^rZ}QB%D5{g&YP$CMUXzT23Zz3P
z@wAp_$gysP4zWUy)?2M2-kBO`j#D)zuLglpqSek!=QipUG<YlBV2+hfKQhOU?<sP%
zlxfIgyzWKol-E@e<y_}0BGMmBCO%+0!<izrxJ?m7s~K7>$rS>u<hp98bL1<PV_?!O
zlzf*W9poQ_K6{EBdaSwcW7r&l16x6VkmpGT4+|>vjPhN#$+CG4iww?5wBo(24|DuI
z*7hq!zr`lupu>|bs$ZCAmwOH44aRYY`jRHbb$?MXHLdd+?F?JCw`L??n;Ycr5FVgA
zJ9cvuN3=lYQtxYx;u2(M3N{5d4<%=hzSrbP&f#6#Z>+D*Wjgq-if8J4@KFwQ6NJ97
zLPErH{<to%tD0!#X?nMJ*q2YC=*<w)@cue%`hZunPukAU`ZiqbOxq=ERPvpgQ$_*T
z(hq+_2zET1Y#x}Yp5D+0ntiN^Fy9Hw(rugGe2$+GdU)KD6+YjY1IAQc>ma|J?yq6a
zzodun0-ZwvGPL(ompn_P8Z2ORDLp;j)#_Ojf#YlI)7IUq-I%sO)7plXaB1g|`w&B)
z_49=KXtxAiOT?Lw>g-D6kwg&{bmd8HCo+iK_rGVn@19&xkn{$ehGV9)DYcGaPxN<$
zu@<R999!Zi&0-p4vjF$^Yb%HrcFZKt3k%w>H6vtT1H-?!iJ6gH-Z*6XWwJo^mDry|
z8PV|Wlm4zfKzxys@y5$i*23tuez}bDJnEgD+a2pDFI0?La~GN3cNWO7?fEv)Mw~<D
zJqioW5DuSu%X%f?y5qzC-WCQp0y63#^6l>7_Ar{`?rgv*RkmOT_bqrzmg(RlcGwpS
zxlPcrXz(>~eR1(5|3Th@7Pv~zqd4u>Z6J50i`SDa^XmxR61`dT-mO=lotBZhis-Ll
z6x9-8K=syh-VI)*<5-cS&Vx^Rl=HHO0n5(}NB5E)Go=1ya@M@_{@Q7M2{^5mK2xdN
z<<fXE`T*3|B|k*tjv!KzzxP%a(hi($>oQEgX|l_IZ0St~rHQRHP5qF-9$m}N70KWb
z7`%W<|5AX~ce!6{sjUJwY>gE~30yD-PlD=s#htAijV-V4Ub^;C!usX7S_9PdZzHOJ
z;@li3Kp*-xYUM$;%+o%!VhWAkx&>_BY0;$AIjWZ{DdU^zsOLLP!_y!q-Gwi-tP2Ax
z>L6yXUv%niE9<)UCrx(o(_^csvNE?QQAHDuFeR&!F)xcB1SMQDQ)se?`p2Cq*+&|S
zjWl+)*OXX)`Iod#?xB3~O2Z>Anj|s<g1jWAa?iJP+R(mHm7>a&(kYQhG9sa3F_?-+
zM)RH!o}e<4Op?6(tVD{|b4=XrmOJeUayej(kK$Ng`KL*)N6nq@3sJL%5WkV7i-tCN
zg=vPTY>?axx}Weq9FbWlsa@$8=uu%1_@N$${iUyMtU!8+2tS^xmG<zN+o%-q=QLAS
z1twNq+(xDxql*Py<#d07<^?H%v+HF|Gq~uNxdgHV>ZVYyHRHI^bKP{{ol{byOT=3H
z&0kzDLPs=T>UXpF`+fbv^*niKyA)mgJ)L-!L#EkMXfB5~X<)tH4Wqy}Y~IjIQr#PF
zf&fpDH>R6k7qt!<Np#hT{MKIla>KKg{!qHKyy*tXCLX}%pY#XSEVk$Th6eToiDN!!
zU;YC2;f<Mqfxt52`%KOl=T|KExz^S95Dwx{4WBBpIHsICZt>tH67k2qRI6ZIy}9kN
zL7Q{hKP+BIoqMHZai2Fr4ChG}j}4<GLSRXjyE;iWnMyL|OwyE>)qEkhS+ZGr%*2Ly
z;p{=9wsqRzk9PgY^hkWp-xSIXTk|}nJ5ZZT&GoS$?)dY(wU4Ghy1k{tA!}*$!D2JD
zbIE>vfQNxma@fF;joCfvt#{uRGFAD$p(|*T_8G?_jOzN~&Qv<ZYdK8RLxF(0L@%SC
z2wDU66c0pD`<)TBs{{)$u<_>Va^=5eQu{pF;%M9&X;r8K<+v^#s2#Vv<Wxk*6*mws
z>X^EqWm(=Ovtn!eEoackD}$ik*PxFG|11Qr51b}7r*`s_)B0^IhpxQ(YYnbWAkC_j
zZ1(UM;~cjJ?;qq4xL^8AA?;wNB*vx6%kM4PMgg4h+ETtU_1Z_TF8yLjD>vjBFtuc-
zhD7_g>(*|vZ)o2Ga`C|~q#C@gOXD+=tfuGLQVpL|bP3ZHZcMTJzE?8wAo*X|H0R@-
zUyf#6gc%0V?m1-!tP%PcGp=IXHaZC^IWzNO_nrVT!q4CIUnDwvOqSFJnr9P4%tJkI
zj(xML1H#(+#`(!*0PN!iogkj=Cx(qgl^r|&<K#V?PnLH@k;P$KCY5q;K#k3`_LA6?
zq|>am`Zw#lEb>!6jPV%9$pP%s(fP#j&<Z~@_$!%-{`<3S;i~cP3=*N=c@uQ*!0B<%
z9`Pl#A9Z@kulv=CVVi<{mSb`wA`CLdTZ~lK&B@MOlYlaiiem3aFU^0e<$RRdtX_B6
z2HvmMAfic*Ntcx*kx2C9%i13CZHGlv&+!gz3p9Cms$I6K#zoF_RV$v*jOCB>1jYMq
zmAsua9U(RLZ$544?C|x4q`gcT1Ag}O?et#NnEoO(WY#QZ2F!}916Ak1h55dBJ^y9x
z*e%Vn^8@P+Bo{UHx6jh=EG6LTvBOs!)}O-r$Z<m(w<sGczh&lAzcr|7G0j$5Oe#x~
z#CFKTst^Kk-t9!X5$WNI%=)d*sNbK4!S^Q$P9P{y9kU3uJ0q>JC<gNNFT;mX>kJ#4
zzX)&Ne)sx8vpnBQ$#Sy#x2?;6eq>qsI9myoAT4Kw+{0_Wej0fOs?~~_k&BaOY=PW-
z{ewpHp*$kJS=iAWXhWpGpPo4}-B51yn_0o^CXVdmW;Dx!9ds$O9C-xNTVSeJZU$bA
zo^k`dV)<o|M&wT#LOS1&ALsafs}yGkOFk1&xt4Uib~r5|^%SbWd>Vs&?!YN=93!_;
zEq6wjg!r>pCyTA34mFg<Vr}3;32)<0?Ae0c979WJ9MzHuF_^iQ>18#{mVA2jm@5d!
zmW~=OtUD(nGcA0m>L~v*0uZ)U|G5KUfdUR63Y+{prPkUXOFBD{b6>^R-Y~Bqx+;wP
zgR;cZOl$3@CQGfBE=Lf`d3(4Md25ka&(__GhC!QE6r2}0DaeF7j@{7$7({O;rWkew
z3|}no9En`vQCNzbs~es~<xCzhzj(D=K73Dce7wk}^B#*DPhL+|(BpM+A+Q!)IxwZE
zKVrD20D9~ierWcEtn6|e?v={X^@fAq(j;(!-<9~u{p2b*4H-T_31-o;Jy(MpyV#AI
zI9P;H`q*M~Mz0jAfKTXAT*h>AKaZ8YSSlhbLO;nhSYUxB{0wP-JCXV~tw$H;yZ1$x
z9RM{7&-|rjRze4j&<u4#u?`1%KTi1XGQ`U%@^x_x+s8f(d_`$Xi1g3f|Lq>W)M3Uu
z+6#LE9?~pK@byi3Nxdz$+%arX0EDViVqXNY;0~Kv<+kD<tpYS7Zv&lv>?KfxaH5rM
zWu15OHPuIdH5JE&R6a$q-WhsrNf}6i4dG_>AMn=oj}(0EI=vp@;2%d~<EU=eUQnD7
z!f*L$e6b$%2dFje*OnClGywt7v&W#CbV#9<NMq4f^Z*qd$sqJayl*?@zz1K|n1FP8
z0X`cA+$+BI5yNw-H8``VdjSuF2)FVk>vGBJ-yHkNl6$q2n+BXpQNX~)Nxbp))Ubjk
zLN@qX0NL9~;k<{8?Vp3}uM6|RcS^IEtz3OC-b#jN0~5NWf&jYG(dg6RI`am(34NU#
zTv6+H$WDSLk2vFH7Vn?I2InZx&2GH4FeHU-Cq*cy6NBjDmpv7ASYL=QbD@szGzUZ%
zbvpYXM}6^E{Y)43SUe;Js|G3ySECEEvmS1eWG@Go>mb64j&gEEMa<thc>QkNQaJ6X
zGg|wy@R)v$z5`6QUF}*O3uNehIl1djWozb?T@l^|LshBVfXuWxN3@Kx^sKNNJ~}!M
z5Fc~J{UBHg1VFn>$sj@CKajxfd<mdOYsqVSg~sWd#<*+(hhhdIf+gPFLtEW6C!aT;
zFZp2_CBtLW<4WuimB&dLH<}E%`>KdNsIeI69`8Y#a;7M^cOoeV4%*2=?eCCXDwzbm
zkz)*idh?VCJEUzi&4a6#uBDODQ=nY<kQ#sM6}^SjfIDyHsu~tw(}RD;hf+x(Gbo22
z{Vgh%1+71nU7yTV<+NN!93Gz!VBJms)xd>^5_9&4q8RULb7>OL9mLJrT3Ntm1!1^^
z9F3fh)1x}9rB=<t_4@t|`$WZcz3~W4CW0^_3-t=-8)^_%g{8+yXO?`Z`YA^iTzHEL
z!MZF?_A7b@KLZR?+}Ui{ULlZlH@`H$TK=5ermsCA)A~jNlqEh|h5qo%Ju_0&;w;;H
zaLInaT_)O#KZ<n^9pg5FzHIGS60x9FozuF1{aLhkmX=4V_vl5+hmQ+8Cd*KUtmMik
zmCak0a}T`@F=vOCb`k>CA*07#y7*Xnm|x6&vbIrB9nTedxa2myz`chen|!OBu)gnx
z>T;;-)zL~}#oqYQ?%b4PGtegP1+VNBjO@<6nb6Z=kM622i1SmiBp1+pl`?0wm?VSb
zj7hAn@X!ehFwN79*PZoOWvZ?8y)VV?pO8yviL4&m3-cpA6*UicZ)G|IT}GQ49ov}J
zJX$*@C{(ILraqgYftm%4pb?PqSH;8y|A-o`>EB@_xNZlb7+FORKm9wIh@6Fjp)8by
z`k%$d0b;ZHU57h1D$xFK<dPXPNZ)tKM`|ffn303b^&hB#VV|^y8*D95iu&rIlf(^6
zbrB+P<*|htRMnY;me(+pqP>Yk*GiMUdx61n>_EdE{6ce?f~=}ARxk)MlbMB0V?W;C
zKc)N?Kj8GJD6q}aDoi3iIe}W7kK8bnT}U0;CMMHo0AMVLk(%afGO6yAYbu804UXMz
zSbvVwCjGpA^KNfb=ggwcVNO7VEakRCY1GrC?Ltn#A#rnpU5VvMSH^QQkQJdh<9!sz
zWZ>y}@0||6o-dI_Igx(QfVt>H{L{l9v60ng<VKztsyy>7OWEiB*fTYF(c;Vyel~+6
z`fl3vy!hJXHR)mlW?828!F>T7qxzulej5#J!;h%}9%f=+b|IRhd#41+86P1Q|H?OI
zBywbDM_^(Cz`hbdAcl+es9D$zmhW9~sZSYVT&ddv(k&h6;JAJ)E0FuusM=8s0KLj2
znun|U6yNjN(8p)=db*zfIiHtdQ#sVCkY<=pSI3`lVng{hq?`PJn8Lg~M;t8@18YIS
zH^`QP!BvIi+1}wZ^a*bc5(lwvTAL8HT-h7{B+g!RrM3Y#tN6o(_KKT)ihc2$Lfb6M
z0*14x2iIncH!ruBR=lPq-8admUT-Nts-MvzG=*N!Z|4t-qO9)N;V!uM#`D6qBh*<V
zN^<kH2GgpI7Hx#UxsM=R!G}yiDou2yrNAg!dFIOStrQ5{+}xhbXlb@G%3@{q1^z&I
zmT{+Xa?M^yxZ>KIu}{=n#u-`~<b`Z$Yb$49FC~CgtIO38>IVn8*PfWD+_~suSrxYB
z^=ZN-{B$TBfEIQU@?!Y1F1a)*C=nUjGtHte+%vT4V0eS?=>j))2o(c?AD4v{Ead~k
z_Zghgw^<H>sYegpexGtl{NwDZo<9yvoVcoRO+&B93dghV=5PF7xX&xxD$m?_fJ0<f
z&FJm#pxjH<`ReeLPZAG;qCD?zn=Q8*mb_-l=ZwD>iwPmFvcrHs!s;r4lTZ%T&^BrU
z{U^KOw<wHub+T@ZwP|P_{uXEII!CojkAjdRej;xiRjp<fszM7zW=d=t1m|C!^{uP#
z;F!9u{z^xk7%Vp!@>x`MoC^Zi|C5$)9OR%0u?qK$P+LS$nC*parR7Tiy0rd*ba%Pa
zDB3ZKBHa>;#fLYV4d~UPTFF}cBxM%4_3>4xHqF@`_MdU*QY=^3Gk^9I%52#pl=Drt
zFK6ww|7`vDqWMCJTGV)fa}hCTXZ}Sqzsk7*_bx|J!hd%&j0|>A*z)k0S+hdbUyr^K
z_oC8Y{-e=9Wae)jO1?c_D20Sj3}4KZJr|l=ODOohpCeb_ftD{gK_!1<7^c&F-h?av
zo9zGnz~ig)f2P3}`&uK$A9yec<BbKYx^xlxbOXsVNquim{&TGVA$&d{J8Nxr^4;Zd
zD*3Ex!qH6g8dUR7kN@8&hs!RY5k-!O&Ryks7^{oZ)5nDWH)H?4rNEfxg&)EW1l-TM
zeB=0*5XpEMd~v0$>Ayjp9R)CiEDsZl;2S()m-}RexXtl_ZX1rI72*4zY0K3w7~OL>
zkGLAF`lFFHwVoxcVDyD37vc!C?Y-<eT#k|;`<wjlG?U+Qs;MP(uLXxy2$H$z<R2rC
zF{IjCiL!TlBgWhRv%jIo8LjJ>LRk51iU0qjR62x2ZMUnd$gBxvKKp|ZZ4LdqRqEEE
F{|CI7K7#-N

literal 11194
zcmXY12RK{*_r3^%7^TCkP@^bfk7`M6suZ<q?@`n$YDK6~YVS>rnzf6nU0dx$?b@V9
z?cLw)_y5oHWIV|oxu0|Hd*1iFCtTy1A~`8NDF6WE%1UyY0DzAKKf6GQz}K-~o?PG?
z$!jHjHvpjM{P)D`asFfpeo5ypujj7iZ0+u8=4u6adV2ENIXSpln7y{*b9S{!-;tsR
z063s5ht&4U*q-ywFnW>5zqcT{K3~Fil2xpk%Ld8KP0ta8HW$I)qhX?BsB+O~DAb2~
zxWyA7G#DY<k&`oqT0-5QE{98Lz;i_DpKJR<gNZ5F<esp548L~%aPs@;`V(olMk4(W
z$&(D9-E05Hr=!}F&in@?#YXF8-|9)!#)Z|!r{CMmFsF)qwODU&|0SZ2jbra`0{;7j
zzUb@MnG#CMM+8gf8F5!lM>y<>jggklU>Vs6xTA2nizMmVEjS}ZjF2XCF(n1Xqc{&W
zeHLtH4W-*?3{)uc{B?omMJu&FJ;HP6H`CTp6xPy6asI}n#lq(+r@Uw%V>|u=((ekG
z38j$d%sM3+1R5mh93oRa!PZiTW3>3rM(p6kH3czpFa6Ksk@0aYdb;B64EMm6maTC>
zdo1ID$8c(m-?z@GOuDZ(#+?wA47BWkD8LX&4Xx>UJV&fym1B>(NBD|OFv~(m{l*{q
zh0l)&Oc6DIh3NL1$Fj;I7l8W!KoEqgQlx?m+ZY;fkZP>a$!&e4HY<Z2KfpOA-hZpj
z`g!Qq@>{c8$n7K#mc|~1;L%oS&)?UtW)<_-#47Vb%&kz}Ev=1FgeVNY*X>}Nl*!>G
zT1ws0$z+=}lh1d)h$+nxm_AH>Usnzz*kIfx03XgGVLR$gsC93r0!@;*HQ^Y$nPY!B
z-%7m4DM;c8MKQ^bj~F4SFX!)Dop<)GkL`rdS?*pqKYh)ligzj$7JxTGP^cg9db}|3
zj@I9R#7E{<v#2U^Du4<>mR~{qTKkQaRj5jVrh1j>r8JNv2PY^Eg1;p*zEadqC&JbZ
z6CHp-9%W;d4&#`GG}a%nLV3TEq~Q~7kProw>P0#)G*ks#xf3w<{QQnOmxeuf`ms$^
z>^JdfLppq>J{l25<BaaTzv53bF*oNpiId%Z*U^gJd-H0UI9;163%x{~^4gP1n;97|
zEx`~CW|rBKbL0iGL!gCuZf-G7P!U+1UL8i;HtO9!Am%YUn$h&$?sj2}&bekrHR@x>
z*bX^tx6Bbc&vOPk5tpm(EtQ6R&bo+A7QiQ8`{h^rb+wpZ-k6s;ucZi>@il%l<p|)Q
z4Myz^Tu`*Ml~H7HtB9=np(ji0h(zh(IXQ#q1pQ0>dKhqjX7U!$3cuBFX($qKeJKH+
zHROH^^esVtK_}!FZID+TZi=$O$!@pU->lBSukr4|L!=k?5Pls73tpx~RNYivd0p9_
z5YaPve^D8r5*5e}Im%d1*gbiGyDxlZw{^a^xbOHEq4b%Fo?dUP;r)I)fbzg0WM26i
z@FjUNev-H8U#+7YsJ@Nfq|wLb))73u`Ra1IZUFnn!$WOzQWiYvPhngrfnKt!iviiz
z)>buBkAWsL-MJ8+u1GtLK@d{0GLX3`>gZt)S1Yqx1KrjZcPpM;k=l9q;JSX&q>kUG
z&l#kp!N0bFyYZyfiyb=>ZuQCi4tEN}69I!`w)mungjAUYOR)={Kaa1q&d_S<M8=Ts
ziUhW$J!{j0L)p>+QMl<lrCydyc)Gwwe^VjdRD_JDWYm#cj|;_cQ;Hq@*7zNPx?P^k
zkU)oVzWeI+QuM<i9r`g2wtPME2duee^^4-GvTa_T7!O-VJ5Kq_Fq>p)iA43|tLp{o
zgPt)y6$k$;n=91--nZ0)*?}W?84qq$N+Yj&UCMkO4f{A^j-GbcZASy*wk6r>ZLt*&
zO!4N*jr*Plf#-P}cOd6k+TsL7;!!o9wT&Z$NG5b1zUK9mOx;kx@|r|*N$r0Qo^+;p
z?MARQmZqVL{Y{u<vCv2ANpKokhKp5bi!D>gdc){I;wtIobtoccBCySGdF`vNuFmx?
z&UO8TEcGp0k^%BqL8$#Z4INcCM%+!n$I2+QFmMxwt#WF=Df*;F6{8PKyY{Au3SjG=
zX}kQQBWjh#8H~xhK^r<e%CiUS%I0b+tQ|@oODPK-%1sw3pT9`YfkUq?0W4XCOXa%$
zU)B_?Ytz@SyukTnZW1Tw!zjN_%j{bJt8@R5bMZq%1bG{pFlAc;qh>lwl*60F1^~3g
z-9-M&O~)kq$<;sggbovoouf<D((-%0@pV3UW8B}Ssu@w)Q3puo?;8yD_Kv)RY~TcH
z4O#X$!R(RxxlV}OW+B2gnmRf;HC{H^TVA>TZt!`SM!xphJIaHmT6*>gcOe5cW>g<>
zN~TG;I@yq(^MOH30pO=Gw9&=ZA64zTxO)uE$F|v6f{XDX6ve|><i25<ZIkxL8~C!V
zHnIJqX8WE+yg#AoaG66NJwZ`ZlwY=${+#|K0t35>%9nsiHczJKb2f{MKuf*=F<)Vc
zUa404Z-GS}alQ&bi85(ozaLJ!knw6(-ejRo64f&CDX;eOwc~{s$2H#c<`FjkG_x?!
zq^aVo+eh7FsbRCF(k^^DEU(^P|Myy6o0ngDdSH)LU-J&LT}PT4kK?MhuU@vgXgJ_K
za;bVfXPK>CR(cB^%3aH`IDJe9Zz2ELdfoTYsBNpL-tn*2y*pNS_4#IvtK91BP2efX
zJTub}7HCPSOoQ$#k-YKg+)?}YN_5TmYT|y{4$`n|vElmYS<Lz&#-9Ji-jKp%zSV%s
zZ&exJM;vF5$L75h2`-gBK8x}O%i3$S^+q@%+h*oi`u+M^TYiAo5N!ky2`$$ZCebCc
z5)m1$mxgo)f=FPerQl1>ks8{8XqnBiuZ1;Mc<==hem1Z-3qGp~RCpB}X0faQyt`Tb
zF#Pm@YsK1WY@tl;`C%k3$r*oqla+@$>J8(1B^-K}s?XsG*=^G|B1@_@6|&=nL4ohG
z90N>$zt40bMD8`<5(3CZ9)BoPb6~{QM-#gxNe{Nb7Aau{vcWc~lLjb1Ejkga692U)
zc%OU#380avl1awda_=$lMqX6CLdCF5cigbeV|%&%5>vZRa%zLEi#od6Ahs)yeaE#;
zyPT4gO7s}k7O$6(Q$bY8jM8KM{Bt62Ujld&*MDyjpK$qEakI6oTnS!;VaHPwHNQ+c
z4V7DJ@jgm)WH<k00Z}ZA9(HBliA<o26adY2R?mA1F_?wQo$4PD<RcO`hY3ql64NlX
z9`#Rl)(^bv{n%(0-iNewulu8RI159Si(mLnZfOarrHMHkHySg~5|b(l?A-;(H?d}m
zOQW3xn|om9yhwHk$$~CVpB=3K^}0Gg;&r2T?+=JE!b%cHCTfxi?N!0UoD%yTZWm}4
z7wI9g2;uu`Xp~4Ie457f^MjXB4lnsRTFTQlUM)~^I#T=~8|K;mUgbgpT?$g6>;9$x
zxX**B^nE^OER^%YKVd*^JL%QT;|_R>v>gQ<;TO$fh_ay6=UPr4gSnjXGSW76nbWtq
zBWETbEncp88+%hm{V_&8Q^P6v5M@!IH$DI%<GsWHh;5XG-H`??O<gCQ!=jeuL@E0g
z$c1PpV6SYV17axp_Or4N*X_swcr6}2fXF2KhBsEPSv=vt8#+1)^SG~I!W_n;*`vB<
zcZLP|lV<<vGXhbW_c9A2=JVr}d9uFJO}ynGb<pOX<h-hU8_TTl7s#V&KMlI%_LbC}
z1;9TRt(}(3DJZG@)~pkw`qO>>14R(rJGu9bi=nhSg8#(uORX4+hVY*5iw=#Q!z_#|
zI_H?G*Ovyh5b;iMew;6v$NpnT$a!l5h6f`6>ky;umVSZWTZn7kPs=r1plsa<9nFF_
z6Spz5KOs5Hgnz1t>rJyIiP}_EDx!PK$`lfq)cD)SfZY5e`SZh^*?r#%T$A}XfySOS
z(%v~y#`s*eb#Xi1{*$ZLo473~3PUQN6iQ4)LES&*`op6<zD?<1bD;*d3|)wR21UzQ
z9>f9{{)a);x3=-}A^;eBZwdXoShoUFhG?7tCo0AO1a46=WwEz&!cSXOvzES3DQK<K
z=ArZlj|SMQ_ni(YVd`(M+O%)Hb|}z{Pv0o7tDQI4W+(batQox;PI;qm)OE-u!>CPa
zQ@%+kU&cM0S<Yu=yLPY1gLd!gwSHNp7(uB`TzlL8zv7!v;V6XlFhW#3<q6P?#H|fy
zFC2bhdg?>7a#6Q{cbcY+2RTQr>yt0^iRwK+rF78#R9b-cHei6F7hR|p*kZrqpHSPK
zDdtzG7gK)1n!Y{J3vzVwq5pW_fCe%;10BD-NZ-?@lH+2||HMx`62C(~Q>zsaP`Amc
zJZx*+p=4wPz;BiW=8TNR#~WcEGohyPU*YrN$|sHE`p2ggzePphDGl;?6CCC*CapDL
zs!SOZpByf~i}l)VK|J-{Do1W5!X=r5;|t&N!h`}yxtw;uRPdBd&*kiBZWzU)mC{E|
z%<Bvs2~u>GZnd|iKl<vMp>3Qc9<?0E&RIG({gb_vozv5Pvs9<_8|*q)X5tmr!}DhE
zsOzVqq9Vu|ZZ&>0_es^k;)koNfW+Zv$-9bQQb~8a*eC(o!|mGiT)bt4-lJrW(Zl$B
zo6C5^pA{9r-kstnDH~flUabuxd8e#^Tfsi9{c|4jk}*M-WI3o1&Ifoov1t1D9q>&@
z{P@D3s^#I(y<Iz8L|!%n>260CL+&RHTKxoT@(PE{m?EBJs8=%1V7IR+DmCp7hwYps
zj4FO*PJoQa{<&)Ao9YJ+EGds!5_{UV;Gg_;xz!!i3+be}<8MYhjlu4JuP$qZC{fP)
zOi|`hvi`IyHQb2EGeJbk0^0T0decKA7Bxrvo+)9Q=hjT=u^XBG?66is^oYqPfZ&ch
zsA|4x8)Kf*@ZjBiw=|`p7tBFl^?6kbERZHFu2$sweL}U(0&0?EGWv-9Kj0$K*0vGZ
z)_^s@=F<tmr`G+5k3%C|%}*{F3PSh^?}aYh^X_SH4=j8Y_iTEqH-xp%d2sdZs3BiM
zB3BO=Gk*8i@A%=ZFTz=?$cr!-b09q~gerDLOof+NYS^vqft$W4KxE8~nS4HUN&1Qv
zso@hA$lLPgU{mP}-T4R8bIPi=(ak9WO&5sqM5NkSxVq=zyI=PB$}M>kKV34eAP3c^
z)JpqaD}Y9f@6o3^G4sIEmrBL6cO5a-N|WjYiz)?p-6Z9{aU#&XL`YM7yXUF@(XX4y
ztd%a0R?>14@iXyR`&4xwl52{Qu89Qq-q7W+=+lm-corz|X25sc^*^fio{+ly(9fid
z#K^(f;t_LA+*&14B>Zjd?EPv>eyUM+Zw7P<KA*dL*4XoPw=u&mk8&EkFf}{Q#UunQ
zVDsYFEvhC~%Liwb)(&yEe?rb${@MU>>W?XorBpAEaS9tdJN1Ae8*O7df>iAr*TnvJ
zVQ<QCM$OvitcnA~!!`4S#lL(arBD66!j*J>933C)jQdWDU|@c!rj{l~W(Vo-kU^Wm
z?Y|4bqcgnG0f}2TYKj@6({NiRRNMGtWF*W;sm-AapXAQ*(UCHF;e@cvFWOi}A%7$r
zczk)ylOTae_X~^ZaCaeLf;)205Bn(*er8_MwA3}7qMt{!)yB-zbE>+<`brt!co@B`
zASuH$#Lm&7Rs{ZX`btxO&667kZpyIzX)w4&>UPZ*^Kli0YpTE>kn*$fojWOw4zsM2
z07ZMWt&s3uDOdx4p5IB06k?fWbJW+-8oWWn<iiXZEeHJhZ}&D&R6nRf2HD-y(N#G#
zc-5^D0e4N%A6M{xcD8``J3%-!(8kBdhrME9#1+-(pE0svbPq9Tg)c!bV|pN|Bq}T9
zN4ji?x(jK~ZhPR}YE@*H|K?A{{mrid=0w@NDeI|S*|!E&)sn54+7?g96t3Y=drHzZ
zJ4dI(BERztx!X6S`C>n=2I2Jz*9bZ><8Dk+yBgs>D&^9>&5OP;J5m9LRm;Vu*^I7r
z8=Kw3Oi;9t$rtchp4&?92D*)xZ>yT!V+7^-1DV2aB|)w1=z)~MKijTX_W5EP<sXBP
z&bNA>3oMfcX4H$lUC-axT`g?tco*;c11^xs)xgo#Ai9e+D{v)qXIwVy75RQE0eQwi
z(?a+klY0r0QZX<xWqMhdou|W9PHc-xxl*D-Z%di|oEWlfzR}4njY~oN5%2Rv@Y(Ag
zv9{2*z0vfx7#fMEP%01p_04paw~kNdbNOURykm=CVn4@ya7*}gwI2MJEhxZf254$c
z@vMdZ6rf`hQ&>`q>Z*2joCfO3cZK#`+NZJ$y`|$YdW;7wg2-+$IoIh~FqRQ>5+sX%
zs{H4;SZemTP~Z7BIrhv!1*)>DtZGFA0S4DJp^<9~uU?_@w1ZMo?DdQxfePcg2;R*8
zD5G%>Gs!3q6+g=jJ1RLFCxJb+J}F)cIbyQ>3DV3=X{ug3&#qVV(4W>D<A`Tq+2l^_
zCo@%g7Po^Szeby8eVPzOBB``x*v{lEoO`OCQ4gw?X>%W;Y-6?W;yGL*5`55wb%}&z
zh_3_nC!UNrS8P=l%i?X|A(Veyms@{<`o~1I1>mJMMuk6Ds!qnS-h@#k5trePMx}q5
zgguIVV;{$tPq%aNi@d_Z4<FUr4B+!^Z*4UR@plqL5H7nyXWI|okG@<wLRCqbUq|Bf
zviqAN9rsFZNKr+2tVf3irei*v@E?CN97tLge~4%QW{i>0Unwu^jp#J2oGm@~Vv968
z^%s_cyKo$~bn(5!Y`G~bD~s82s**()yPe-ef{(!OphmRtCwwttjDCO@b7qxS@p76W
z@;@q+w~<vIFnZAMgDA~gl;jN!TT&_D5X-jrb?dpdvkBDHOZB*s#u)FNOFbnbG+wg8
zwH?JD_)6&T?B%nNtO^j4R;Dp?a7JG3(Ws!Ht2?a33$0%gjya$$%vWt5zlrf4Yv12z
z#CkaY@V!l#&I2U9&7_Ub?aut%_5LpY)2aXggBwC6Kv@~i)&pXgdogJcAy`S3A-?t<
z0n4VjUj()@YWR`Lca6PXNe+$Bm<j%1{nQDt`5Qn1NQQ$x2D|TmE>V}2(<)mvwPJy9
zEl~P2dhlz7N7-T_1!Q&_#YvYYY5^%LhK$yM`lZiYVM*&D#9uQnNmK#VgjYQ@Vq;1V
z9+xhijrZ(5pJ#kNLp90%)ie#zwbRDB=aOZk3_D2+r$DgvL@2H)H9tkDkLQ2a5GQQY
zz`V1qhJ7-Mvqm6?iB#G^0=GW`dvrs(%$QJSPnnr%O?>sFf0cbag%b%VY;8ROG|7h`
z?2@_O{f1acrY9ff0hu-{hKq~8+=)eYW6<VWYWZkuqtGNGR~pXy-$>GmZ*s_T+J$8G
z=9o(w3C5Tj&Ww-8d&v|9hd83`>!yhiCcI!hPk%IXjnbaij=T!ImEo`KV3S-nJg{n)
zhUlJ4%t^zAZ%+>je9RE<%CEyOsO4(8*(Oz08w_4BN#~xT=`4G4??VcT<MljblUX|a
zU*jnexMof4>CzS1H>S?>l7Of~1t!e>X7vWDdLG&gpHV^HlGSFgKJ(2%rMd}J=l_fp
zl|3j%$F?Tff^sGGoe&Vl<jlD^+n7}MReS!ah&xTK6BALG^8VdeLV~)D2kp%y`bl+E
zPtE5pU}Y5OtxQetG>-7Vq6iNU?Ro3Bwqr`BG?rT|;o~;>;9{2djf{&(DyOkum_0NP
z63k|s8&L!Kf5#pLRo6KgZ9|UFkBseteGc<9JrWsvV@igPK{w)=pG^vTNB0P{e*2>1
z!-acNJHhVHRG2W9o@3|A_|w{!=*osRBwBe@O|C^bjdG|-x#<^p>96j`%>oHH`2+LZ
zG8c(wXH<xJaCsLWx_>NAQVsTrV5*saUasR)!zb?$_I3<q9;=lthE-wg`2&slU4zql
zFU^zkg;y4~EZir9cG+(BK8QOh9MZU*4*!3CK`PnzAHS%#q~4HhZ^p|*ukaw%;;Di*
zSQnzBMm=Tf67So|d~>oqCNGl+7n9{z{s>A6B-JG?YSF5(d3kTo)OvwN=qCgZNWB+<
zmt;>W+U=BjXI*?X8gEMLL5yv|?uGf#ue79_1nLZ=#Ap5uuqU!=b%n8BovJr+cH3GB
znnH)Vkb}bD$c|LoSdPs>QKX+AfdifUH`A5&3I~!o7r_#+gN|>TYPv$yc|X5ef4r6s
zN^+SB4?ErjNvDlbWD+5@_)mG`{m6P#S`Cw6Qz7BbQ;4AWHwg?Iuf)Qqd!pj;E&{2J
zXdp-LcQYfT5g8ci0JtlC|9($4n`2Q#(;IF$g~b*{$J^_ZK^wSrrs}A*`QK$h=2}u{
zq)7y2zN#dQZ87#prUr_st_o}vf*diqKb;S}qvT^46O=>2u=u%pYsGJJ*e-W}xFM88
zs<5-wbR3`rX{`8Kga0F;m;NAlSJ7NI)OH#+xh1fxLy{MAe8W~k-b=y`A|#l76z5s*
z=t&|b>uh;D8}`Vk)0F;XX*ZRX7AyQV&b<&!16rn?uGSl5md5t_I!l2eU0W*n*2)PG
zP<_1#FDqo%;7)UQ#mL*9waF~aG^egq+^~Nk*Kkwji}qcD^-!}Z&O|Y-L_{-XhuYYO
zEVj(hk00I#euqf0C9J1%1W=G<ExGGs@JKKHbKe5pMo2v$jR)!uQXP*5PqPOH;PZG-
zvsFOBMLkgM6qRQiuWe-D#@dmV->Cv^7DGMTz?Lk#GTKO^&pq3FO^A_)FWQ^sdhciS
zmH5*Fv7F)-YGWn~H|2+wR@68s)8P=LM<UMP1m%*T1Rv-8+P|89UNTu^y*HWvv<!$a
z`XNaz^n<2<Oi-4R5P4sM+pOCVJAEgTAYLvdSe-yzv4o>sf=&SQscV<OHfmkpxkNiJ
zTX|x_Jk3e)(ESB>;vK`UzJy`<Ye$Y*Y<?*k^P;QfV3l@>lrIdt2Nz0g%z-uGx7Dgg
zc-LQ<S^G!Gkbk5FDM!L-tQ)J`K<Xwg6%$i2d--kz8j>gf{&Zai{Xz1VaF2@<487>x
z*k5a{#c5yx9V}l>p~TA(L%wtYnKS-za=*oh+J_V{a{hYX`rXQ+5tSMov6LPe`xoe(
z^GB)8;j7j(H|0m^vAh8!Pi?{=)Yso#GCF&R9;)?$-Abbmr_`}WiZ9H&L2WAZu~Gnf
zB=%Kj^iD{rQBM<DTNR19B<j>0V5a6)b$DV%d)!!Yq>G3%dSp5B3*d_@E)n?$c2?+E
zZxHKx5dxXtyH1n|qePS!l8>+6rHUyyhyOGrk||RUR5FUNdv#c1=6wT|nU5hJ*PQdr
zEnv|RXKqu9gW>zNxn%rh*N(&Fr)+Ze$-YJ{i=ThGy!~`(U<3^!^Cns61)i@7U#ur1
z<8O7p@#Io94Jybjs=i0zX3g_1W$}gk#bDEiST3R<iNo{*bBIY&|9CC0{PWrn<QI-e
z7PJA2W(=n5)GUySHA<Q_W}F&yN1_9xNfg#fKu8JEPX;6QY&t&Qt&RjASfF$cZmSv^
zf;QxrmJVe7K6noj62NtNLD4x}{V=|%&{37Kv_Q%KWPIk)%wx^GyX6uB<J__Eyye)=
zT`oF=(i=hdj7iIl%`F^*o+@gEr)@}WthjHPdFn>YhF!__lGn<mqaPZVG!I>rYZuXd
zk+*TTT}sU-(Z23nFswp3?PHiOT=6Q<z~Y_RDI*Ym0rT{jbB%jkm$WF3=>eLRx%qKm
z6?j3CcdI%+|4gHYYv4oKPnEl3$i1r#J`^)mq|}VZTqft`{d8T~os%a5uqv#5T0QS~
z^L0`pW0g2$8xAbDNHCqb`UV9ogxyDs=Jk?9lHqS@3-9DKi^{57id)AuJK{ksRzcM6
zk4sHobp<mSZVbyM8GsC$>nuk-@HE2dp(qpeY+jYW41&j`qsr}OMrLMSinI;T5OWzy
z{WEFs+3nQAa(7~s^ZdY)Y?E5HriJL@lr^D(AmzC~Kkq<X-a!3LQe9!fbp{o`B69os
ze>B4>zW?4on(@t})MN)(Rw)(%<kplPc;87YR4bv@{!0^_3M0B==(r3tU^^dA$4G%K
zD};;#d-c@ft(epkjJBq2$n^^$VOjMq1kMIoshk#Avz|2YIlh19!Iyu4R$Qu15Yyz5
znNx}X5|-j-gmahKnP<S~TbNq-t6MCkmYzqe^jJDLkgR`o{|)|;?U16t>6_U6QLV<j
zI}W0O6i<AGrYpg`DHc{v?_MW709Jfyv`Z&NsZP0zrHJN#|2!tfYja@)ofDDrOrz?G
z<bm<p;)AP=3%C?WLbz!gH1gr)n<)YTvfL4<;l>IcF*n|I#vTLGk;nf@3bh5D=OlTF
zR8rfS3i0k0Vn{%Pq=bjX%@q%5>(c!JT0Iocn;B)2_4p>1{&~8pt9H}wT=MJqB)c;3
z^L)pdzxTCbF;D-2o6KpWh9OCqvm!%=<lzIH41N++LDT}|`ysRnd1TpN6mlHc((~O~
zK|ZYXG)_8`Z{xFq7<zMAGZ1#k3N7(e0T2B&*z8o5Eqls<4Lu<rW)PoCyL=a(dZRlq
z%R<>$HsRsi+}v%3Q~DL-AiUs%^eS*+EOd3Rl1*38ps8k4(2-_F6Rbiz&$9RFuQQbS
zo#FCb2GWSwL(bP~G*mo>s4lGCo{LeE2TDWPfTmb(^c!&Xj5|8;0L)PZZhjlmd-r4)
zh~RIGpai9U(oJgI{QMxo6a3`ZtjXt47W>Wt1sLAnW(j2=Ct7$O&e}+xR>)2zKUxE8
zk}!jD5$J~9%oky>j+B0883K)b*e#Jd^SjrhEFBt?QD5P8i2tY1F(gZ=K+~J$o${EN
z5=3Nx1L|kikcM0U`$oJ12qo>25BUZaT~!7lD@ELt`=DJI=vvD`1DV_n{6pN{`?KtI
z^c%=nB><2;{5K1LOFF(iIGC`kmy9p2FPyNUYDRmlA}plHjjTln+N@BYWkzXTf63=k
zs{xT_oKYY4Iw~#N?jKW8n@QuyH7GwuOPqPo6ICcM2-n+x&g<<W>`0}kacRZ<xbYB~
zcnixHkOaTJ7$10Q?#Cw~0;BDS&*%EXwpln0IXWUQv(PY(%5_Ezp{M@%sl`u7e67U~
zmJ=$4PR+SI=zDln%kjKeP8zc8)b|wMm_U@-Wp2Mcu8~~Zoe9SSS9zT4=fH~r)tQv2
zve+cJu#%G2MoEYKo+L2erq7?%?PER`hEG?T$$m)XS4oqNTz&ND5qo%AEr&&lP2t|k
z9MQNR*m+~iNaqIoqS&fkf*6H4$_xJkP!#cXmSvM~a%@84^V6MZCP1LD>RuP*TMmcY
zexvX&uY(|ulDfVz1)p^W)zt*6jHQL`UIg5BvYsoWiTvD9e^h5ZW%|$f?D}JmTHwn5
zarJ&vdu4A#-JqL8SvBg}Mwr?%nb6fz;S2;%hDy!^PrSx1&EiW}gyBw!9oKs^w(R#m
za@fHVp_m6RZ`4L_kpk-Z)X&7tURVCbW?3<%sQA0qNcA+S-1tjLK&GW1xZ!3-`#4$U
zP1FnY20cEem;FbxauXMLC*Fic2<oT0=u$x_EvxGb)#7&*J@LH6%_3hscu(<QGOb6W
z>@_3Gp8gl)_YWffwP}p*!0LXpFeZfr>@%&2W1Q*qV13B2fKX--tf$?p-JJFM)Z?<A
z$Gg`!p%*VEL)%Zyf|8lS>DEQO)YMev1Tv%Zqi0gDJL|Ut;)<0VIG*1uXDL+AZ-4~u
zjiQwu48Jx{xG?g6udDAnDErbFbp`NTCIGQ!zBE$^&GqF`v`}E7{cp356Ue<g6QRUK
zbKK*#AO@NW{vblkHV~e5-+vF`4;rsM1Zxd#Tz2pMG`91*CJwA1Wn*gN<>Frsw53B{
zeRnJ4q4&Zo{U!dxtis~aAgMIr=|}>$RA?g9p>8@P0o(Vevupso4(oVuWCo3fd)@6a
zaT;+=Oi>P?8A+q=)I$eLBM_%oMoCAvbK`0`wCeMO0Acf#?C)*VuP-^VRd+!;lOUW$
z@y~q2)+IMa9Wf%WXxkp<V`(V`vgsgoq4ZTc^93VpMv+6CYH?2INZY|RLG(ES)wszg
zH;{LKHerjN%WO#^$*SPuBA$hH2?4vPuDCahhM#`KO^XGc)j|aFp}K&$QVCzVciGF(
zK-bg-BgAF^@Ee0-Cuy4@@p_o!<jREm0x82EzlRu-*5*#EOF4I#H`mqm7Ax2Y2>PP~
z5Z83@kxM7YU%pe(xH7l6GxSA;%4k^}6OR7OOzD!b?}F6G93!V;tF<jO#6G4O#1<r~
zx2362ZHEWn(fSdL(_;W$VK+R|pH5P;I^<C{{TtsnpedJeuip(VzM^voc5RgUY&zx!
z{AQD|T-;D$w|AGG=P~Rl!B=`yoA%mOXwbUK@d6rjdYvy}d2?2!fLdu{rD^Af#-ESj
ze=}7gFYuQ25q}AZbi<2D+n6tM27)C_r?c80Qtd^@d=>UlfF91NQ2A4xTmC5(k8+!u
z8W-o{zx~j9dwyo-p`C2Nxb%Pgr*%UT!x@WF=Ons)FNi#*P`Lzb@>duc$GR_-Yc`O!
z`2kykiFQL>Cs)!w#%EBSLoFaxM1Vk;cu@?!r-1l&o=OpYt$5dejB_uV2k9<#NMvh>
zq>@^c#GC-l>i1nktCYu7IWD>icSxo?pwI}det+?S?6)Fnn*VHoN<QW8DGz!zkw3ce
zFN(FZwV&?dR|?<^_M_UQx%DS4@L*O(s#zaIlp{Wvn=byYa6LPUui&XO#ei-YgX!kk
zqxWW(LeWa&nw6l=rd9Gixp9NfKE$ro;B5!OH`44k<>DFWbyw}`qPMDW7&AET$-YyW
z^0{VvZ=T?$YD)fgk(DSoB4$3S`~xye4?$YT7mM(cBdPq@|9H?xZif=t+mI#Zixk75
zSz=^IUkJPeaR#oT8w%24^TehIoMxIldsv0pB3*lL5Jx)OoMt*4oLj<^DfwAtAJwJ$
zu3DH^-5ewscxi_o^z@Z><82YJWjIh;UBS+&5OyZKK&h_CKq;G%YbFz}9=EfzleM?_
zRP~>Pyn9fD$JR2Ji5d_+KC$tC`K_?P?C{8C$=P7lJofRu=h2v1H2l*k(p@}^?}?At
z-(&RYp30}__F-+RiMwngo`m;2!xTd}GT7!^({uijO*~x=)!aI8GU7<^mYu<4u0pby
zOUVz>IGk{;K-F&OfQ;{bT3iiZ;hzm;jmAO`*lM;kQm(n@CfB5V{s6R3CFXk!oM<4C
zExM4s<W&DhRg%u2FMEY%w=>uQNds+Q-29xDa2P)gmy!cXi;15lo6ZILwKJwpxEBcf
z@h50+@1K!2*p#9(f)%Tb3XCAUM^+Z;vff;`!U1iAf4EL~=m;yT2-Zp$F18zb7WhMP
z|Keo6==1nU{Xf2S>hFG&PMn^^AMS`REpY4j=xWbE*!R-ifegJNNtAUvQmU(FfO+ct
z&P$Jf48)%ySh48L>ec-R=<Ck1x^Qr|1G<zRx7J0ZlOxYD;^4}ixDlFJv({`@CZGF`
z=*K=;*P$60)GDs`Z0bUG0r%$FhU9qao#LE+f-M)F_dmW1iweV9K2pn%&xzX^Xhm2U
zP{2nMNg<4jO_<Rq>~_4Cogw6PDpZ-y+`kDs#71Z1yTt-4dnrrEc+5`A%OdPN{+Tf{
zo}>ybCCW{SVn}G`UNY?ZSo?8uATsaCzC~vm#xn2zw9;Z}{2JA=N^S28skG*Q!vnjD
zJwlOlBn@<ZCY_>V5wHgWgG+oZ%;mvZSK|oFs0=cY9tnK`13d;}J!0szc{JjfLzc&H
z!Fkman<knY_u?B-PMxwKt=@(>rcYDEt{j}dS_f~{5;Ke^laE@`jI}wDPSEQ(X1T8r
zr27jK6O*MsJky8yApUA|%vGKR)LwGf9*V_qe7(A`8h>Co8G{z)4ORF&zPN(51MWCG
zAW{n*F<?+DsO8o#kcS2#F5=Jy)S~*OkfH;Lo>|vr2t14eEP<lJSgq2e_$Tp)?dj?H
zFa1HC!NgR<M{L&79tt!3<auiF6^v%7lK)lvo1<SyzswL(CT=L2*jHLI*A0__MV)?w
zx8MaA(bxJjwS7j`W|;k6C;s&;5pFQ6TP|wv=jghxHf9l@KeKe@=j`k}=~6cQ;alkZ
zG!2q!4zrO4{%Lc~1{C=v;yLN)l6S#az%e)+LNZxU7yfVf47fa*l1|a({#NLe7FwBm
zOl;wadPA^hFnGyk4qJ={vLK_oshguwQBfv`$H!}nBnkDrU`%`6e(xeq#baeUF3)>M
z#@<Md**8QT(W0*EFXcmAyr>{!Uxp+8fOK56@g7ccKQe49QRT2E{PB)AW+p*1QyGJZ
zjI!YYJMzkcY431Jl-AZl$_iDz>W;Hh+5HoX%~;O9`fa{W4Ij>-KUZ7oqpVo{2YRTV
zllFEjrk4Z&<@<g)Ln2e27y=ae>t4BggN#(;M1@}8=*CShN+zYBh>3`mH@eyEQuTb-
zN;R2WXRw&XRg1UjoP42sHF5KMCm<uoD_n0zYWT&s5~p|BFJ14UixII)E3`{X`XDE*
zCXx<Lno19E{hqxO{0>ks@Ae4^4HdI8N(MJ5v@-zvRX-;^%2=>`Cg~o$|Mn`hDk{%{
z;I)m6Xjtjkgyx!-(bCh?_cq=AZ%noN&tE);)~=B^tJ^ogxm!S4{+V3G6I1m60L&LM
A;Q#;t

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 7be565d51260d..0b0a2b13fb52a 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -381,7 +381,7 @@
     "nifi": {"requests", "packaging", "requests-gssapi"},
     "powerbi": microsoft_common | {"lark[regex]==1.1.4", "sqlparse"} | sqlglot_lib,
     "powerbi-report-server": powerbi_report_server,
-    "vertica": sql_common | {"vertica-sqlalchemy-dialect[vertica-python]==0.0.8"},
+    "vertica": sql_common | {"vertica-sqlalchemy-dialect[vertica-python]==0.0.8.1"},
     "unity-catalog": databricks | sqllineage_lib,
 }
 
diff --git a/metadata-ingestion/tests/integration/vertica/ddl.sql b/metadata-ingestion/tests/integration/vertica/ddl.sql
index 59a71a1a1f7b5..ceebcd8e9ce2a 100644
--- a/metadata-ingestion/tests/integration/vertica/ddl.sql
+++ b/metadata-ingestion/tests/integration/vertica/ddl.sql
@@ -1,5 +1,4 @@
-\set AUTOCOMMIT on
-ALTER USER dbadmin IDENTIFIED BY 'abc123';
+
 
 -- Create a Top-k projection
 CREATE TABLE readings (meter_id INT, reading_date TIMESTAMP, reading_value FLOAT);
@@ -35,12 +34,16 @@ SELECT tokenize(phrase) OVER () FROM phrases;
 
 -- Create a temp table
 
-CREATE TEMPORARY TABLE sampletemp (a int, b int) ON COMMIT PRESERVE ROWS;
-INSERT INTO sampletemp VALUES(1,2);
+-- CREATE TEMPORARY TABLE sampletemp (a int, b int) ON COMMIT PRESERVE ROWS;
+-- INSERT INTO sampletemp VALUES(1,2);
 
 -- Create partition key
-ALTER TABLE store.store_orders_fact PARTITION BY date_ordered::DATE GROUP BY DATE_TRUNC('month', (date_ordered)::DATE);
-SELECT PARTITION_TABLE('store.store_orders_fact');
-CREATE PROJECTION ytd_orders AS SELECT * FROM store.store_orders_fact ORDER BY date_ordered
-    ON PARTITION RANGE BETWEEN date_trunc('year',now())::date AND NULL;
+-- ALTER TABLE store.store_orders_fact PARTITION BY date_ordered::DATE GROUP BY DATE_TRUNC('month', (date_ordered)::DATE);
+-- SELECT PARTITION_TABLE('store.store_orders_fact');
+-- CREATE PROJECTION ytd_orders AS SELECT * FROM store.store_orders_fact ORDER BY date_ordered
+--     ON PARTITION RANGE BETWEEN date_trunc('year',now())::date AND NULL;
+
+
+
+
 SELECT start_refresh();
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/vertica/docker-compose.yml b/metadata-ingestion/tests/integration/vertica/docker-compose.yml
index 84af5c32a60e3..1ba7990c826b2 100644
--- a/metadata-ingestion/tests/integration/vertica/docker-compose.yml
+++ b/metadata-ingestion/tests/integration/vertica/docker-compose.yml
@@ -6,7 +6,7 @@ services:
       APP_DB_USER: "dbadmin"
       APP_DB_PASSWORD: "abc123"
     container_name: vertica-ce
-    image: vertica/vertica-ce:12.0.2-0
+    image: vertica/vertica-ce:23.4.0-0
     ports:
       - "5433:5433"
       - "5444:5444"
diff --git a/metadata-ingestion/tests/integration/vertica/test_vertica.py b/metadata-ingestion/tests/integration/vertica/test_vertica.py
index fe306d1d0b2b8..94ad33ba21ce4 100644
--- a/metadata-ingestion/tests/integration/vertica/test_vertica.py
+++ b/metadata-ingestion/tests/integration/vertica/test_vertica.py
@@ -1,6 +1,5 @@
 import subprocess
-import time
-from typing import List, Optional
+from typing import List
 
 import pytest
 from freezegun import freeze_time
@@ -17,13 +16,12 @@ def test_resources_dir(pytestconfig):
     return pytestconfig.rootpath / "tests/integration/vertica"
 
 
-def is_vertica_responsive(
-    container_name: str, port: int, hostname: Optional[str]
-) -> bool:
-    if hostname:
-        cmd = f"docker logs {container_name} 2>&1 | grep 'Vertica is now running' "
-    ret = subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL)
-
+def is_vertica_responsive(container_name: str) -> bool:
+    cmd = f"docker logs {container_name} 2>&1 | grep 'Vertica is now running' "
+    ret = subprocess.run(
+        cmd,
+        shell=True,
+    )
     return ret.returncode == 0
 
 
@@ -37,28 +35,22 @@ def vertica_runner(docker_compose_runner, test_resources_dir):
             "vertica-ce",
             5433,
             timeout=120,
-            checker=lambda: is_vertica_responsive(
-                "vertica-ce", 5433, hostname="vertica-ce"
-            ),
+            checker=lambda: is_vertica_responsive("vertica-ce"),
         )
 
         commands = """
                     docker cp tests/integration/vertica/ddl.sql vertica-ce:/home/dbadmin/ &&
-                    docker exec vertica-ce sh -c "/opt/vertica/bin/vsql -w abc123 -f /home/dbadmin/ddl.sql
+                    docker exec vertica-ce sh -c "/opt/vertica/bin/vsql -w abc123 -f /home/dbadmin/ddl.sql"
                 """
 
         ret = subprocess.run(commands, shell=True, stdout=subprocess.DEVNULL)
-        # waiting for vertica to create default table and system table and ml models
-        time.sleep(60)
 
-        assert ret.returncode >= 1
+        assert ret.returncode == 0
 
         yield docker_services
 
 
-# Test needs more work to be done , currently it is working fine.
 @freeze_time(FROZEN_TIME)
-@pytest.mark.skip("Failing in CI, cmd failing with exit code 1")
 @pytest.mark.integration
 def test_vertica_ingest_with_db(vertica_runner, pytestconfig, tmp_path):
     test_resources_dir = pytestconfig.rootpath / "tests/integration/vertica"
@@ -72,7 +64,7 @@ def test_vertica_ingest_with_db(vertica_runner, pytestconfig, tmp_path):
     ignore_paths: List[str] = [
         r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['create_time'\]",
         r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['table_size'\]",
-        r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['projection_size'\]",
+        r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['Projection_size'\]",
         r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['ROS_Count'\]",
         r"root\[\d+\]\['aspect'\].+\['customProperties'\]\['cluster_size'\]",
         r"root\[\d+\]\['aspect'\].+\['customProperties'\]\['udx_language'\]",
diff --git a/metadata-ingestion/tests/integration/vertica/vertica_mces_with_db_golden.json b/metadata-ingestion/tests/integration/vertica/vertica_mces_with_db_golden.json
index 44a5e07d7b996..ef535158165da 100644
--- a/metadata-ingestion/tests/integration/vertica/vertica_mces_with_db_golden.json
+++ b/metadata-ingestion/tests/integration/vertica/vertica_mces_with_db_golden.json
@@ -11,7 +11,7 @@
                 "env": "PROD",
                 "database": "vmart",
                 "cluster_type": "Enterprise",
-                "cluster_size": "122 GB",
+                "cluster_size": "101 GB",
                 "subcluster": " ",
                 "communal_storage_path": ""
             },
@@ -20,7 +20,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -35,7 +36,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -50,7 +52,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -67,7 +70,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -82,7 +86,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -97,16 +102,17 @@
                 "env": "PROD",
                 "database": "vmart",
                 "schema": "public",
-                "projection_count": "9",
-                "udx_list": "APPROXIMATE_COUNT_DISTINCT_SYNOPSIS_INFO, APPROXIMATE_MEDIAN, APPROXIMATE_PERCENTILE, AcdDataToCount, AcdDataToLongSyn, AcdDataToSyn, AcdSynToCount, AcdSynToSyn, DelimitedExport, DelimitedExportMulti, EmptyMap, Explode, FAvroParser, FCefParser, FCsvParser, FDelimitedPairParser, FDelimitedParser, FIDXParser, FJSONParser, FRegexParser, FlexTokenizer, JsonExport, JsonExportMulti, KafkaAvroParser, KafkaCheckBrokers, KafkaExport, KafkaInsertDelimiters, KafkaInsertLengths, KafkaJsonParser, KafkaListManyTopics, KafkaListTopics, KafkaOffsets, KafkaParser, KafkaSource, KafkaTopicDetails, MSE, MapAggregate, MapAggregate, MapContainsKey, MapContainsKey, MapContainsValue, MapContainsValue, MapDelimitedExtractor, MapItems, MapItems, MapJSONExtractor, MapKeys, MapKeys, MapKeysInfo, MapKeysInfo, MapLookup, MapLookup, MapLookup, MapPut, MapRegexExtractor, MapSize, MapSize, MapToString, MapToString, MapValues, MapValues, MapValuesOrField, MapVersion, MapVersion, OrcExport, OrcExportMulti, PRC, ParquetExport, ParquetExportMulti, PickBestType, PickBestType, PickBestType, ROC, STV_AsGeoJSON, STV_AsGeoJSON, STV_AsGeoJSON, STV_Create_Index, STV_Create_Index, STV_Create_Index, STV_DWithin, STV_DWithin, STV_DWithin, STV_Describe_Index, STV_Drop_Index, STV_Export2Shapefile, STV_Extent, STV_Extent, STV_ForceLHR, STV_Geography, STV_Geography, STV_GeographyPoint, STV_Geometry, STV_Geometry, STV_GeometryPoint, STV_GeometryPoint, STV_GetExportShapefileDirectory, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_IsValidReason, STV_IsValidReason, STV_IsValidReason, STV_LineStringPoint, STV_LineStringPoint, STV_LineStringPoint, STV_MemSize, STV_MemSize, STV_MemSize, STV_NN, STV_NN, STV_NN, STV_PolygonPoint, STV_PolygonPoint, STV_PolygonPoint, STV_Refresh_Index, STV_Refresh_Index, STV_Refresh_Index, STV_Rename_Index, STV_Reverse, STV_SetExportShapefileDirectory, STV_ShpCreateTable, STV_ShpParser, STV_ShpSource, ST_Area, ST_Area, ST_Area, ST_AsBinary, ST_AsBinary, ST_AsBinary, ST_AsText, ST_AsText, ST_AsText, ST_Boundary, ST_Buffer, ST_Centroid, ST_Contains, ST_Contains, ST_Contains, ST_ConvexHull, ST_Crosses, ST_Difference, ST_Disjoint, ST_Disjoint, ST_Disjoint, ST_Distance, ST_Distance, ST_Distance, ST_Envelope, ST_Equals, ST_Equals, ST_Equals, ST_GeoHash, ST_GeoHash, ST_GeoHash, ST_GeographyFromText, ST_GeographyFromWKB, ST_GeomFromGeoHash, ST_GeomFromGeoJSON, ST_GeomFromGeoJSON, ST_GeomFromText, ST_GeomFromText, ST_GeomFromWKB, ST_GeomFromWKB, ST_GeometryN, ST_GeometryN, ST_GeometryN, ST_GeometryType, ST_GeometryType, ST_GeometryType, ST_Intersection, ST_Intersects, ST_Intersects, ST_IsEmpty, ST_IsEmpty, ST_IsEmpty, ST_IsSimple, ST_IsSimple, ST_IsSimple, ST_IsValid, ST_IsValid, ST_IsValid, ST_Length, ST_Length, ST_Length, ST_NumGeometries, ST_NumGeometries, ST_NumGeometries, ST_NumPoints, ST_NumPoints, ST_NumPoints, ST_Overlaps, ST_PointFromGeoHash, ST_PointN, ST_PointN, ST_PointN, ST_Relate, ST_SRID, ST_SRID, ST_SRID, ST_Simplify, ST_SimplifyPreserveTopology, ST_SymDifference, ST_Touches, ST_Touches, ST_Touches, ST_Transform, ST_Union, ST_Union, ST_Within, ST_Within, ST_Within, ST_X, ST_X, ST_X, ST_XMax, ST_XMax, ST_XMax, ST_XMin, ST_XMin, ST_XMin, ST_Y, ST_Y, ST_Y, ST_YMax, ST_YMax, ST_YMax, ST_YMin, ST_YMin, ST_YMin, ST_intersects, SetMapKeys, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_NumCol, VoltageSecureAccess, VoltageSecureAccess, VoltageSecureConfigure, VoltageSecureConfigureGlobal, VoltageSecureProtect, VoltageSecureProtect, VoltageSecureProtectAllKeys, VoltageSecureRefreshPolicy, VoltageSecureVersion, append_centers, apply_bisecting_kmeans, apply_iforest, apply_inverse_pca, apply_inverse_svd, apply_kmeans, apply_normalize, apply_one_hot_encoder, apply_pca, apply_svd, approximate_quantiles, ar_create_blobs, ar_final_newton, ar_save_model, ar_transition_newton, avg_all_columns_local, bisecting_kmeans_init_model, bk_apply_best_kmeans_results, bk_compute_totss_local, bk_finalize_model, bk_get_rows_in_active_cluster, bk_kmeans_compute_local_centers, bk_kmeans_compute_withinss, bk_kmeans_fast_random_init, bk_kmeans_slow_random_init, bk_kmeanspp_init_cur_cluster, bk_kmeanspp_reset_blob, bk_kmeanspp_select_new_centers, bk_kmeanspp_within_chunk_sum, bk_save_final_model, bk_write_new_cluster_level, blob_to_table, bufUdx, bufUdx, calc_pseudo_centers, calculate_alpha_linear, calculate_hessian_linear1, calculate_hessian_linear2, cleanup_kmeans_files, compute_and_save_global_center, compute_and_save_new_centers, compute_local_totss, compute_local_withinss, compute_new_local_centers, confusion_matrix, coordinate_descent_covariance, corr_matrix, count_rows_in_blob, create_aggregator_blob, error_rate, evaluate_naive_bayes_model, evaluate_reg_model, evaluate_svm_model, export_model_files, finalize_blob_resource_group, get_attr_minmax, get_attr_robust_zscore, get_attr_zscore, get_model_attribute, get_model_summary, get_robust_zscore_median, iforest_create_blobs, iforest_phase0_udf1, iforest_phase0_udf2, iforest_phase1_udf1, iforest_phase1_udf2, iforest_phase1_udf3, iforest_phase1_udf4, iforest_phase2_udf1, iforest_phase2_udf2, iforest_phase2_udf3, iforest_phase2_udf4, iforest_save_model, import_model_files, isOrContains, kmeansAddMetricsToModel, kmeans_init_blobs, kmeans_to_write_final_centers, lift_table, line_search_logistic1, line_search_logistic2, load_rows_into_blocks, map_factor, math_op, matrix_global_xtx, matrix_local_xtx, mode_finder, model_converter, naive_bayes_phase1, naive_bayes_phase1_blob, naive_bayes_phase2, pca_prep1_global, pca_prep1_local, pca_prep2, pmml_parser, predict_autoregressor, predict_linear_reg, predict_logistic_reg, predict_moving_average, predict_naive_bayes, predict_naive_bayes_classes, predict_pmml, predict_rf_classifier, predict_rf_classifier_classes, predict_rf_regressor, predict_svm_classifier, predict_svm_regressor, predict_xgb_classifier, predict_xgb_classifier_classes, predict_xgb_regressor, random_init, random_init_write, read_from_dfblob, read_map_factor, read_ptree, read_tree, reg_final_bfgs, reg_final_newton, reg_transition_bfgs, reg_transition_newton, reg_write_model, remove_blob, reverse_normalize, rf_blob, rf_clean, rf_phase0_udf1, rf_phase0_udf2, rf_phase1_udf1, rf_phase1_udf2, rf_phase1_udf3, rf_phase1_udf4, rf_phase2_udf1, rf_phase2_udf2, rf_phase2_udf3, rf_phase2_udf4, rf_predictor_importance, rf_save_model, rsquared, save_cv_result, save_pca_model, save_svd_model, save_svm_model, select_new_centers, store_minmax_model, store_one_hot_encoder_model, store_robust_zscore_model, store_zscore_model, table_to_blob, table_to_dfblob, update_and_return_sum_of_squared_distances, upgrade_model_format, writeInitialKmeansModelToDfs, xgb_create_blobs, xgb_phase0_udf1, xgb_phase0_udf2, xgb_phase1_udf1, xgb_phase1_udf2, xgb_phase1_udf3, xgb_phase2_udf1, xgb_phase2_udf2, xgb_phase2_udf3, xgb_prune, xgb_save_model, yule_walker, ",
-                "udx_language": "ComplexTypesLib -- Functions for Complex Types |  DelimitedExportLib -- Delimited data export package |  JsonExportLib -- Json data export package |  MachineLearningLib -- Machine learning package |  OrcExportLib -- Orc export package |  ParquetExportLib -- Parquet export package |  ApproximateLib -- Approximate package |  FlexTableLib -- Flexible Tables Data Load and Query |  KafkaLib -- Kafka streaming load and export |  PlaceLib -- Geospatial package |  VoltageSecureLib -- Voltage SecureData Connector |  "
+                "projection_count": "12",
+                "udx_list": "APPROXIMATE_COUNT_DISTINCT_SYNOPSIS_INFO, APPROXIMATE_MEDIAN, APPROXIMATE_PERCENTILE, AcdDataToCount, AcdDataToLongSyn, AcdDataToSyn, AcdSynToCount, AcdSynToSyn, DelimitedExport, DelimitedExportMulti, EmptyMap, Explode, FAvroParser, FCefParser, FCsvParser, FDelimitedPairParser, FDelimitedParser, FIDXParser, FJSONParser, FRegexParser, FlexTokenizer, JsonExport, JsonExportMulti, KafkaAvroParser, KafkaCheckBrokers, KafkaExport, KafkaInsertDelimiters, KafkaInsertLengths, KafkaJsonParser, KafkaListManyTopics, KafkaListTopics, KafkaOffsets, KafkaParser, KafkaSource, KafkaTopicDetails, MSE, MapAggregate, MapAggregate, MapContainsKey, MapContainsKey, MapContainsValue, MapContainsValue, MapDelimitedExtractor, MapItems, MapItems, MapJSONExtractor, MapKeys, MapKeys, MapKeysInfo, MapKeysInfo, MapLookup, MapLookup, MapLookup, MapPut, MapRegexExtractor, MapSize, MapSize, MapToString, MapToString, MapValues, MapValues, MapValuesOrField, MapVersion, MapVersion, OrcExport, OrcExportMulti, PRC, ParquetExport, ParquetExportMulti, PickBestType, PickBestType, PickBestType, ROC, STV_AsGeoJSON, STV_AsGeoJSON, STV_AsGeoJSON, STV_Create_Index, STV_Create_Index, STV_Create_Index, STV_DWithin, STV_DWithin, STV_DWithin, STV_Describe_Index, STV_Drop_Index, STV_Export2Shapefile, STV_Extent, STV_Extent, STV_ForceLHR, STV_Geography, STV_Geography, STV_GeographyPoint, STV_Geometry, STV_Geometry, STV_GeometryPoint, STV_GeometryPoint, STV_GetExportShapefileDirectory, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_Intersect, STV_IsValidReason, STV_IsValidReason, STV_IsValidReason, STV_LineStringPoint, STV_LineStringPoint, STV_LineStringPoint, STV_MemSize, STV_MemSize, STV_MemSize, STV_NN, STV_NN, STV_NN, STV_PolygonPoint, STV_PolygonPoint, STV_PolygonPoint, STV_Refresh_Index, STV_Refresh_Index, STV_Refresh_Index, STV_Rename_Index, STV_Reverse, STV_SetExportShapefileDirectory, STV_ShpCreateTable, STV_ShpParser, STV_ShpSource, ST_Area, ST_Area, ST_Area, ST_AsBinary, ST_AsBinary, ST_AsBinary, ST_AsText, ST_AsText, ST_AsText, ST_Boundary, ST_Buffer, ST_Centroid, ST_Contains, ST_Contains, ST_Contains, ST_ConvexHull, ST_Crosses, ST_Difference, ST_Disjoint, ST_Disjoint, ST_Disjoint, ST_Distance, ST_Distance, ST_Distance, ST_Envelope, ST_Equals, ST_Equals, ST_Equals, ST_GeoHash, ST_GeoHash, ST_GeoHash, ST_GeographyFromText, ST_GeographyFromWKB, ST_GeomFromGeoHash, ST_GeomFromGeoJSON, ST_GeomFromGeoJSON, ST_GeomFromText, ST_GeomFromText, ST_GeomFromWKB, ST_GeomFromWKB, ST_GeometryN, ST_GeometryN, ST_GeometryN, ST_GeometryType, ST_GeometryType, ST_GeometryType, ST_Intersection, ST_Intersects, ST_Intersects, ST_IsEmpty, ST_IsEmpty, ST_IsEmpty, ST_IsSimple, ST_IsSimple, ST_IsSimple, ST_IsValid, ST_IsValid, ST_IsValid, ST_Length, ST_Length, ST_Length, ST_NumGeometries, ST_NumGeometries, ST_NumGeometries, ST_NumPoints, ST_NumPoints, ST_NumPoints, ST_Overlaps, ST_PointFromGeoHash, ST_PointN, ST_PointN, ST_PointN, ST_Relate, ST_SRID, ST_SRID, ST_SRID, ST_Simplify, ST_SimplifyPreserveTopology, ST_SymDifference, ST_Touches, ST_Touches, ST_Touches, ST_Transform, ST_Union, ST_Union, ST_Within, ST_Within, ST_Within, ST_X, ST_X, ST_X, ST_XMax, ST_XMax, ST_XMax, ST_XMin, ST_XMin, ST_XMin, ST_Y, ST_Y, ST_Y, ST_YMax, ST_YMax, ST_YMax, ST_YMin, ST_YMin, ST_YMin, ST_intersects, SetMapKeys, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_CatCol, Summarize_NumCol, Unnest, VoltageSecureAccess, VoltageSecureAccess, VoltageSecureConfigure, VoltageSecureConfigureGlobal, VoltageSecureProtect, VoltageSecureProtect, VoltageSecureProtectAllKeys, VoltageSecureRefreshPolicy, VoltageSecureVersion, append_centers, apply_bisecting_kmeans, apply_iforest, apply_inverse_pca, apply_inverse_svd, apply_kmeans, apply_kprototypes, apply_normalize, apply_one_hot_encoder, apply_pca, apply_svd, approximate_quantiles, ar_create_blobs, ar_final_newton, ar_save_model, ar_transition_newton, arima_bfgs, arima_line_search, arima_save_model, avg_all_columns_local, bisecting_kmeans_init_model, bk_apply_best_kmeans_results, bk_compute_totss_local, bk_finalize_model, bk_get_rows_in_active_cluster, bk_kmeans_compute_local_centers, bk_kmeans_compute_withinss, bk_kmeans_fast_random_init, bk_kmeans_slow_random_init, bk_kmeanspp_init_cur_cluster, bk_kmeanspp_reset_blob, bk_kmeanspp_select_new_centers, bk_kmeanspp_within_chunk_sum, bk_save_final_model, bk_write_new_cluster_level, blob_to_table, bufUdx, bufUdx, calc_pseudo_centers, calculate_alpha_linear, calculate_hessian_linear1, calculate_hessian_linear2, chi_squared, cleanup_kmeans_files, compute_and_save_global_center, compute_and_save_new_centers, compute_local_totss, compute_local_withinss, compute_new_local_centers, confusion_matrix, coordinate_descent_covariance, corr_matrix, count_rows_in_blob, create_aggregator_blob, error_rate, evaluate_naive_bayes_model, evaluate_reg_model, evaluate_svm_model, export_model_files, finalize_blob_resource_group, get_attr_minmax, get_attr_robust_zscore, get_attr_zscore, get_model_attribute, get_model_summary, get_robust_zscore_median, iforest_create_blobs, iforest_phase0_udf1, iforest_phase0_udf2, iforest_phase1_udf1, iforest_phase1_udf2, iforest_phase1_udf3, iforest_phase1_udf4, iforest_phase2_udf1, iforest_phase2_udf2, iforest_phase2_udf3, iforest_phase2_udf4, iforest_save_model, import_model_files, isOrContains, kmeansAddMetricsToModel, kmeans_init_blobs, kmeans_to_write_final_centers, lift_table, line_search_logistic1, line_search_logistic2, load_rows_into_blocks, map_factor, math_op, matrix_global_xtx, matrix_local_xtx, mode_finder, model_converter, naive_bayes_phase1, naive_bayes_phase1_blob, naive_bayes_phase2, pca_prep1_global, pca_prep1_local, pca_prep2, pmml_parser, predict_arima, predict_autoregressor, predict_linear_reg, predict_logistic_reg, predict_moving_average, predict_naive_bayes, predict_naive_bayes_classes, predict_pmml, predict_poisson_reg, predict_rf_classifier, predict_rf_classifier_classes, predict_rf_regressor, predict_svm_classifier, predict_svm_regressor, predict_xgb_classifier, predict_xgb_classifier_classes, predict_xgb_regressor, random_init, random_init_write, read_from_dfblob, read_map_factor, read_ptree, read_tree, reg_final_bfgs, reg_final_newton, reg_transition_bfgs, reg_transition_newton, reg_write_model, remove_blob, reverse_normalize, rf_blob, rf_clean, rf_phase0_udf1, rf_phase0_udf2, rf_phase1_udf1, rf_phase1_udf2, rf_phase1_udf3, rf_phase1_udf4, rf_phase2_udf1, rf_phase2_udf2, rf_phase2_udf3, rf_phase2_udf4, rf_predictor_importance, rf_save_model, rsquared, save_cv_result, save_pca_model, save_svd_model, save_svm_model, select_new_centers, store_minmax_model, store_one_hot_encoder_model, store_robust_zscore_model, store_zscore_model, table_to_blob, table_to_dfblob, tokenize, topk, update_and_return_sum_of_squared_distances, upgrade_model_format, writeInitialKmeansModelToDfs, xgb_create_blobs, xgb_phase0_udf1, xgb_phase0_udf2, xgb_phase1_udf1, xgb_phase1_udf2, xgb_phase1_udf3, xgb_phase2_udf1, xgb_phase2_udf2, xgb_phase2_udf3, xgb_predictor_importance, xgb_prune, xgb_save_model, yule_walker, ",
+                "udx_language": "ComplexTypesLib -- Functions for Complex Types |  DelimitedExportLib -- Delimited data export package |  JsonExportLib -- Json data export package |  MachineLearningLib -- Machine learning package |  OrcExportLib -- Orc export package |  ParquetExportLib -- Parquet export package |  ApproximateLib -- Approximate package |  FlexTableLib -- Flexible Tables Data Load and Query |  KafkaLib -- Kafka streaming load and export |  PlaceLib -- Geospatial package |  VoltageSecureLib -- Voltage SecureData Connector |  TransformFunctions -- User-defined Python library |  "
             },
             "name": "public"
         }
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -121,7 +127,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -136,7 +143,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -153,7 +161,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -168,7 +177,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -188,7 +198,184 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:dbadmin",
+                    "type": "DATAOWNER"
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "container",
+    "aspect": {
+        "json": {
+            "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "create_time": "2023-10-13 11:23:05.308022+00:00",
+                            "table_size": "0 KB"
+                        },
+                        "name": "clicks",
+                        "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "public.clicks",
+                        "platform": "urn:li:dataPlatform:vertica",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "user_id",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "page_id",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "click_time",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
+                                    }
+                                },
+                                "nativeDataType": "TIMESTAMP_WITH_PRECISION()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "Table"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "urn:li:container:343f520ad0fb3259b298736800bb1385",
+                    "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385"
+                },
+                {
+                    "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4",
+                    "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -212,7 +399,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -227,7 +415,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -243,7 +432,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.358215+00:00"
+                            "create_time": "2023-10-13 11:22:37.846965+00:00",
+                            "table_size": "2119 KB"
                         },
                         "name": "customer_dimension",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -551,7 +741,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -568,7 +759,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -592,7 +784,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -616,7 +809,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -631,7 +825,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -647,7 +842,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.368954+00:00"
+                            "create_time": "2023-10-13 11:22:37.857152+00:00",
+                            "table_size": "138 KB"
                         },
                         "name": "date_dimension",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -955,7 +1151,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -972,7 +1169,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -996,7 +1194,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1020,7 +1219,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1035,7 +1235,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1051,7 +1252,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.375896+00:00"
+                            "create_time": "2023-10-13 11:22:37.863745+00:00",
+                            "table_size": "327 KB"
                         },
                         "name": "employee_dimension",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -1320,7 +1522,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1337,7 +1540,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1361,7 +1565,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1385,7 +1590,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1400,7 +1606,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1416,7 +1623,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.385843+00:00"
+                            "create_time": "2023-10-13 11:22:37.873181+00:00",
+                            "table_size": "2564 KB"
                         },
                         "name": "inventory_fact",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -1529,7 +1737,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1546,7 +1755,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1570,12 +1780,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -1594,12 +1805,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -1609,13 +1821,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -1625,16 +1838,17 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.362016+00:00"
+                            "create_time": "2023-10-13 11:23:05.408507+00:00",
+                            "table_size": "0 KB"
                         },
-                        "name": "product_dimension",
+                        "name": "phrases",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.product_dimension",
+                        "schemaName": "public.phrases",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -1653,33 +1867,7 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "product_key",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": true
-                            },
-                            {
-                                "fieldPath": "product_version",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "product_description",
+                                "fieldPath": "phrase",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -1690,76 +1878,252 @@
                                 "nativeDataType": "VARCHAR(length=128)",
                                 "recursive": false,
                                 "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "sku_number",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=32)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "category_description",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=32)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "department_description",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=32)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "package_type_description",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=32)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "package_size",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=32)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "fat_content",
-                                "nullable": true,
-                                "description": "",
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "Table"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "urn:li:container:343f520ad0fb3259b298736800bb1385",
+                    "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385"
+                },
+                {
+                    "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4",
+                    "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:dbadmin",
+                    "type": "DATAOWNER"
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "container",
+    "aspect": {
+        "json": {
+            "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "create_time": "2023-10-13 11:22:37.850505+00:00",
+                            "table_size": "19 KB"
+                        },
+                        "name": "product_dimension",
+                        "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "public.product_dimension",
+                        "platform": "urn:li:dataPlatform:vertica",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "product_key",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": true
+                            },
+                            {
+                                "fieldPath": "product_version",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "product_description",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=128)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "sku_number",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=32)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "category_description",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=32)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "department_description",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=32)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "package_type_description",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=32)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "package_size",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=32)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "fat_content",
+                                "nullable": true,
+                                "description": "",
                                 "type": {
                                     "type": {
                                         "com.linkedin.pegasus2avro.schema.NumberType": {}
@@ -1933,7 +2297,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1950,7 +2315,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1974,7 +2340,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -1998,7 +2365,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2013,7 +2381,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2029,7 +2398,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.365453+00:00"
+                            "create_time": "2023-10-13 11:22:37.853878+00:00",
+                            "table_size": "3 KB"
                         },
                         "name": "promotion_dimension",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -2220,7 +2590,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2237,7 +2608,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2261,12 +2633,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -2285,12 +2658,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -2300,13 +2674,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -2316,16 +2691,17 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.379273+00:00"
+                            "create_time": "2023-10-13 11:23:05.296044+00:00",
+                            "table_size": "0 KB"
                         },
-                        "name": "shipping_dimension",
+                        "name": "readings",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.shipping_dimension",
+                        "schemaName": "public.readings",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -2344,7 +2720,7 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "shipping_key",
+                                "fieldPath": "meter_id",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -2354,39 +2730,215 @@
                                 },
                                 "nativeDataType": "INTEGER()",
                                 "recursive": false,
-                                "isPartOfKey": true
+                                "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "ship_type",
+                                "fieldPath": "reading_date",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=30)",
+                                "nativeDataType": "TIMESTAMP_WITH_PRECISION()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "ship_mode",
+                                "fieldPath": "reading_value",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=10)",
+                                "nativeDataType": "FLOAT()",
                                 "recursive": false,
                                 "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "ship_carrier",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "Table"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "urn:li:container:343f520ad0fb3259b298736800bb1385",
+                    "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385"
+                },
+                {
+                    "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4",
+                    "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:dbadmin",
+                    "type": "DATAOWNER"
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "container",
+    "aspect": {
+        "json": {
+            "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "create_time": "2023-10-13 11:22:37.867119+00:00",
+                            "table_size": "1 KB"
+                        },
+                        "name": "shipping_dimension",
+                        "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "public.shipping_dimension",
+                        "platform": "urn:li:dataPlatform:vertica",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "shipping_key",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": true
+                            },
+                            {
+                                "fieldPath": "ship_type",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=30)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "ship_mode",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=10)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "ship_carrier",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
                                     "type": {
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
@@ -2403,7 +2955,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2420,7 +2973,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2444,7 +2998,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2468,7 +3023,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2483,7 +3039,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2499,7 +3056,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.372409+00:00"
+                            "create_time": "2023-10-13 11:22:37.860541+00:00",
+                            "table_size": "1 KB"
                         },
                         "name": "vendor_dimension",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -2638,7 +3196,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2655,7 +3214,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2679,7 +3239,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2703,7 +3264,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2718,7 +3280,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2734,7 +3297,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:38:20.045598+00:00"
+                            "create_time": "2023-10-13 11:23:04.970568+00:00",
+                            "table_size": "0 KB"
                         },
                         "name": "vmart_load_success",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -2782,7 +3346,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2799,7 +3364,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2823,7 +3389,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2847,7 +3414,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2862,7 +3430,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -2878,7 +3447,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.382549+00:00"
+                            "create_time": "2023-10-13 11:22:37.870169+00:00",
+                            "table_size": "2 KB"
                         },
                         "name": "warehouse_dimension",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -2991,7 +3561,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -3008,7 +3579,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -3032,12 +3604,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -3056,12 +3629,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -3071,13 +3645,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -3087,23 +3662,19 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "ROS_Count": "1",
-                            "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
-                            "Segmentation_key": "hash(date_dimension.date_key)",
-                            "projection_size": "138 KB",
-                            "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
-                            "Projection_Cached": "False"
+                            "create_time": "2023-10-13 11:23:05.319029+00:00",
+                            "table_size": "0 KB",
+                            "view_definition": "SELECT sum(customer_dimension.annual_income) AS SUM, customer_dimension.customer_state FROM public.customer_dimension WHERE (customer_dimension.customer_key IN (SELECT store_sales_fact.customer_key FROM store.store_sales_fact)) GROUP BY customer_dimension.customer_state ORDER BY customer_dimension.customer_state",
+                            "is_view": "True"
                         },
-                        "name": "date_dimension_super",
-                        "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
+                        "name": "sampleview",
+                        "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.date_dimension_super",
+                        "schemaName": "public.sampleview",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -3122,7 +3693,7 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "date_key",
+                                "fieldPath": "SUM",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3135,33 +3706,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "date",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.DateType": {}
-                                    }
-                                },
-                                "nativeDataType": "DATE()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "full_date_description",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=18)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "day_of_week",
+                                "fieldPath": "customer_state",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3169,228 +3714,7 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=9)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "day_number_in_calendar_month",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "day_number_in_calendar_year",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "day_number_in_fiscal_month",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "day_number_in_fiscal_year",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "last_day_in_week_indicator",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "last_day_in_month_indicator",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "calendar_week_number_in_year",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "calendar_month_name",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=9)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "calendar_month_number_in_year",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "calendar_year_month",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=7)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "calendar_quarter",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "calendar_year_quarter",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=7)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "calendar_half_year",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "calendar_year",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "holiday_indicator",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=10)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "weekday_indicator",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=7)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "selling_season",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=32)",
+                                "nativeDataType": "CHAR(length=2)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             }
@@ -3402,29 +3726,49 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
         "json": {
             "typeNames": [
-                "Projections"
+                "View"
             ]
         }
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "viewProperties",
+    "aspect": {
+        "json": {
+            "materialized": false,
+            "viewLogic": "SELECT sum(customer_dimension.annual_income) AS SUM, customer_dimension.customer_state FROM public.customer_dimension WHERE (customer_dimension.customer_key IN (SELECT store_sales_fact.customer_key FROM store.store_sales_fact)) GROUP BY customer_dimension.customer_state ORDER BY customer_dimension.customer_state",
+            "viewLanguage": "SQL"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)",
     "changeType": "UPSERT",
     "aspectName": "upstreamLineage",
     "aspect": {
@@ -3435,7 +3779,15 @@
                         "time": 0,
                         "actor": "urn:li:corpuser:unknown"
                     },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension,PROD)",
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension,PROD)",
+                    "type": "TRANSFORMED"
+                },
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,store.store_sales_fact,PROD)",
                     "type": "TRANSFORMED"
                 }
             ]
@@ -3443,12 +3795,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.sampleview,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -3467,12 +3820,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -3491,12 +3845,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -3506,13 +3861,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -3524,21 +3880,21 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
-                            "Segmentation_key": "hash(product_dimension.product_key, product_dimension.product_version)",
-                            "projection_size": "19 KB",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(date_dimension.date_key)",
+                            "Projection_size": "138 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
-                        "name": "product_dimension_super",
+                        "name": "date_dimension_super",
                         "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.product_dimension_super",
+                        "schemaName": "public.date_dimension_super",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -3557,7 +3913,7 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "product_key",
+                                "fieldPath": "date_key",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3570,20 +3926,20 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "product_version",
+                                "fieldPath": "date",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                        "com.linkedin.pegasus2avro.schema.DateType": {}
                                     }
                                 },
-                                "nativeDataType": "INTEGER()",
+                                "nativeDataType": "DATE()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "product_description",
+                                "fieldPath": "full_date_description",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3591,12 +3947,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=128)",
+                                "nativeDataType": "VARCHAR(length=18)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "sku_number",
+                                "fieldPath": "day_of_week",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3604,64 +3960,64 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=32)",
+                                "nativeDataType": "VARCHAR(length=9)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "category_description",
+                                "fieldPath": "day_number_in_calendar_month",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=32)",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "department_description",
+                                "fieldPath": "day_number_in_calendar_year",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=32)",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "package_type_description",
+                                "fieldPath": "day_number_in_fiscal_month",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=32)",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "package_size",
+                                "fieldPath": "day_number_in_fiscal_year",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=32)",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "fat_content",
+                                "fieldPath": "last_day_in_week_indicator",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3674,20 +4030,20 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "diet_type",
+                                "fieldPath": "last_day_in_month_indicator",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=32)",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "weight",
+                                "fieldPath": "calendar_week_number_in_year",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3700,7 +4056,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "weight_units_of_measure",
+                                "fieldPath": "calendar_month_name",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3708,12 +4064,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=32)",
+                                "nativeDataType": "VARCHAR(length=9)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "shelf_width",
+                                "fieldPath": "calendar_month_number_in_year",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3726,20 +4082,20 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "shelf_height",
+                                "fieldPath": "calendar_year_month",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "INTEGER()",
+                                "nativeDataType": "CHAR(length=7)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "shelf_depth",
+                                "fieldPath": "calendar_quarter",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3752,20 +4108,20 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "product_price",
+                                "fieldPath": "calendar_year_quarter",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "INTEGER()",
+                                "nativeDataType": "CHAR(length=7)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "product_cost",
+                                "fieldPath": "calendar_half_year",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3778,7 +4134,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "lowest_competitor_price",
+                                "fieldPath": "calendar_year",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -3791,41 +4147,41 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "highest_competitor_price",
+                                "fieldPath": "holiday_indicator",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "INTEGER()",
+                                "nativeDataType": "VARCHAR(length=10)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "average_competitor_price",
+                                "fieldPath": "weekday_indicator",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "INTEGER()",
+                                "nativeDataType": "CHAR(length=7)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "discontinued_flag",
+                                "fieldPath": "selling_season",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "INTEGER()",
+                                "nativeDataType": "VARCHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             }
@@ -3837,12 +4193,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -3854,12 +4211,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "upstreamLineage",
     "aspect": {
@@ -3870,7 +4228,7 @@
                         "time": 0,
                         "actor": "urn:li:corpuser:unknown"
                     },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)",
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension,PROD)",
                     "type": "TRANSFORMED"
                 }
             ]
@@ -3878,12 +4236,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.date_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -3902,12 +4261,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -3926,12 +4286,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -3941,13 +4302,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -3959,21 +4321,21 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
-                            "Segmentation_key": "hash(promotion_dimension.promotion_key)",
-                            "projection_size": "3 KB",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(product_dimension.product_key, product_dimension.product_version)",
+                            "Projection_size": "19 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
-                        "name": "promotion_dimension_super",
+                        "name": "product_dimension_super",
                         "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.promotion_dimension_super",
+                        "schemaName": "public.product_dimension_super",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -3992,7 +4354,7 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "promotion_key",
+                                "fieldPath": "product_key",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4005,7 +4367,20 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "promotion_name",
+                                "fieldPath": "product_version",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "product_description",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4018,7 +4393,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "price_reduction_type",
+                                "fieldPath": "sku_number",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4026,12 +4401,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=32)",
+                                "nativeDataType": "CHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "promotion_media_type",
+                                "fieldPath": "category_description",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4039,12 +4414,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=32)",
+                                "nativeDataType": "CHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "ad_type",
+                                "fieldPath": "department_description",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4052,12 +4427,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=32)",
+                                "nativeDataType": "CHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "display_type",
+                                "fieldPath": "package_type_description",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4065,12 +4440,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=32)",
+                                "nativeDataType": "CHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "coupon_type",
+                                "fieldPath": "package_size",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4078,12 +4453,25 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=32)",
+                                "nativeDataType": "CHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "ad_media_name",
+                                "fieldPath": "fat_content",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "diet_type",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4091,12 +4479,25 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=32)",
+                                "nativeDataType": "CHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "display_provider",
+                                "fieldPath": "weight",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "weight_units_of_measure",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4104,12 +4505,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=128)",
+                                "nativeDataType": "CHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "promotion_cost",
+                                "fieldPath": "shelf_width",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4122,28 +4523,106 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "promotion_begin_date",
+                                "fieldPath": "shelf_height",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "DATE()",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "promotion_end_date",
+                                "fieldPath": "shelf_depth",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "DATE()",
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "product_price",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "product_cost",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "lowest_competitor_price",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "highest_competitor_price",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "average_competitor_price",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "discontinued_flag",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             }
@@ -4155,12 +4634,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -4172,12 +4652,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "upstreamLineage",
     "aspect": {
@@ -4188,7 +4669,7 @@
                         "time": 0,
                         "actor": "urn:li:corpuser:unknown"
                     },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension,PROD)",
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension,PROD)",
                     "type": "TRANSFORMED"
                 }
             ]
@@ -4196,12 +4677,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.product_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -4220,12 +4702,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -4244,12 +4727,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -4259,13 +4743,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -4277,21 +4762,21 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
-                            "Segmentation_key": "hash(vendor_dimension.vendor_key)",
-                            "projection_size": "1 KB",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(promotion_dimension.promotion_key)",
+                            "Projection_size": "3 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
-                        "name": "vendor_dimension_super",
+                        "name": "promotion_dimension_super",
                         "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.vendor_dimension_super",
+                        "schemaName": "public.promotion_dimension_super",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -4310,7 +4795,7 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "vendor_key",
+                                "fieldPath": "promotion_key",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4323,7 +4808,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "vendor_name",
+                                "fieldPath": "promotion_name",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4331,12 +4816,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=64)",
+                                "nativeDataType": "VARCHAR(length=128)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "vendor_address",
+                                "fieldPath": "price_reduction_type",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4344,12 +4829,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=64)",
+                                "nativeDataType": "VARCHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "vendor_city",
+                                "fieldPath": "promotion_media_type",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4357,12 +4842,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=64)",
+                                "nativeDataType": "VARCHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "vendor_state",
+                                "fieldPath": "ad_type",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4370,12 +4855,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=2)",
+                                "nativeDataType": "VARCHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "vendor_region",
+                                "fieldPath": "display_type",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4388,7 +4873,46 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "deal_size",
+                                "fieldPath": "coupon_type",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=32)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "ad_media_name",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=32)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "display_provider",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=128)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "promotion_cost",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4401,7 +4925,20 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "last_deal_update",
+                                "fieldPath": "promotion_begin_date",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                                    }
+                                },
+                                "nativeDataType": "DATE()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "promotion_end_date",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4421,12 +4958,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -4438,12 +4976,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "upstreamLineage",
     "aspect": {
@@ -4454,7 +4993,7 @@
                         "time": 0,
                         "actor": "urn:li:corpuser:unknown"
                     },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension,PROD)",
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension,PROD)",
                     "type": "TRANSFORMED"
                 }
             ]
@@ -4462,12 +5001,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.promotion_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -4486,12 +5026,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -4510,12 +5051,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -4525,13 +5067,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -4543,21 +5086,21 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
-                            "Segmentation_key": "hash(customer_dimension.customer_key)",
-                            "projection_size": "2119 KB",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(vendor_dimension.vendor_key)",
+                            "Projection_size": "1 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
-                        "name": "customer_dimension_super",
+                        "name": "vendor_dimension_super",
                         "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.customer_dimension_super",
+                        "schemaName": "public.vendor_dimension_super",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -4576,7 +5119,7 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "customer_key",
+                                "fieldPath": "vendor_key",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4589,46 +5132,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "customer_type",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=16)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "customer_name",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=256)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "customer_gender",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=8)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "title",
+                                "fieldPath": "vendor_name",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4636,25 +5140,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=8)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "household_id",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
+                                "nativeDataType": "VARCHAR(length=64)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "customer_address",
+                                "fieldPath": "vendor_address",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4662,12 +5153,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=256)",
+                                "nativeDataType": "VARCHAR(length=64)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "customer_city",
+                                "fieldPath": "vendor_city",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4680,7 +5171,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "customer_state",
+                                "fieldPath": "vendor_state",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4693,20 +5184,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "customer_region",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=64)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "marital_status",
+                                "fieldPath": "vendor_region",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4719,72 +5197,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "customer_age",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "number_of_children",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "annual_income",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "occupation",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=64)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "largest_bill_amount",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "store_membership_card",
+                                "fieldPath": "deal_size",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4797,46 +5210,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "customer_since",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.DateType": {}
-                                    }
-                                },
-                                "nativeDataType": "DATE()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "deal_stage",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=32)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "deal_size",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "last_deal_update",
+                                "fieldPath": "last_deal_update",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -4856,12 +5230,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -4873,12 +5248,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "upstreamLineage",
     "aspect": {
@@ -4889,7 +5265,7 @@
                         "time": 0,
                         "actor": "urn:li:corpuser:unknown"
                     },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension,PROD)",
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension,PROD)",
                     "type": "TRANSFORMED"
                 }
             ]
@@ -4897,12 +5273,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.vendor_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -4921,12 +5298,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -4945,12 +5323,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -4960,13 +5339,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -4978,21 +5358,21 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
-                            "Segmentation_key": "hash(employee_dimension.employee_key)",
-                            "projection_size": "327 KB",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(customer_dimension.customer_key)",
+                            "Projection_size": "2119 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
-                        "name": "employee_dimension_super",
+                        "name": "customer_dimension_super",
                         "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.employee_dimension_super",
+                        "schemaName": "public.customer_dimension_super",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -5011,7 +5391,7 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "employee_key",
+                                "fieldPath": "customer_key",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5024,7 +5404,33 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "employee_gender",
+                                "fieldPath": "customer_type",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=16)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "customer_name",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=256)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "customer_gender",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5037,7 +5443,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "courtesy_title",
+                                "fieldPath": "title",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5050,7 +5456,33 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "employee_first_name",
+                                "fieldPath": "household_id",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "customer_address",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=256)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "customer_city",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5063,7 +5495,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "employee_middle_initial",
+                                "fieldPath": "customer_state",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5071,12 +5503,12 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=8)",
+                                "nativeDataType": "CHAR(length=2)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "employee_last_name",
+                                "fieldPath": "customer_region",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5089,7 +5521,20 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "employee_age",
+                                "fieldPath": "marital_status",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=32)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "customer_age",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5102,33 +5547,33 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "hire_date",
+                                "fieldPath": "number_of_children",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "DATE()",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "employee_street_address",
+                                "fieldPath": "annual_income",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=256)",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "employee_city",
+                                "fieldPath": "occupation",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5141,20 +5586,46 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "employee_state",
+                                "fieldPath": "largest_bill_amount",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=2)",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "employee_region",
+                                "fieldPath": "store_membership_card",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "customer_since",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                                    }
+                                },
+                                "nativeDataType": "DATE()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "deal_stage",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5162,25 +5633,1087 @@
                                         "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=32)",
+                                "nativeDataType": "VARCHAR(length=32)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "job_title",
+                                "fieldPath": "deal_size",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "last_deal_update",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                                    }
+                                },
+                                "nativeDataType": "DATE()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "Projections"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.customer_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "urn:li:container:343f520ad0fb3259b298736800bb1385",
+                    "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385"
+                },
+                {
+                    "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4",
+                    "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:dbadmin",
+                    "type": "DATAOWNER"
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "container",
+    "aspect": {
+        "json": {
+            "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "ROS_Count": "1",
+                            "Projection_Type": "is_super_projection",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(employee_dimension.employee_key)",
+                            "Projection_size": "327 KB",
+                            "Partition_Key": "Not Available",
+                            "Number_Of_Partitions": "0",
+                            "Projection_Cached": "False"
+                        },
+                        "name": "employee_dimension_super",
+                        "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "public.employee_dimension_super",
+                        "platform": "urn:li:dataPlatform:vertica",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "employee_key",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "employee_gender",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=8)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "courtesy_title",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=8)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "employee_first_name",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=64)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "employee_middle_initial",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=8)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "employee_last_name",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=64)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "employee_age",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "hire_date",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                                    }
+                                },
+                                "nativeDataType": "DATE()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "employee_street_address",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=256)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "employee_city",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=64)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "employee_state",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=2)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "employee_region",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=32)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "job_title",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=64)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "reports_to",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "salaried_flag",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "annual_salary",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "hourly_rate",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "FLOAT()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "vacation_days",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "Projections"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "urn:li:container:343f520ad0fb3259b298736800bb1385",
+                    "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385"
+                },
+                {
+                    "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4",
+                    "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:dbadmin",
+                    "type": "DATAOWNER"
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "container",
+    "aspect": {
+        "json": {
+            "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "ROS_Count": "1",
+                            "Projection_Type": "is_super_projection",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(warehouse_dimension.warehouse_key)",
+                            "Projection_size": "2 KB",
+                            "Partition_Key": "Not Available",
+                            "Number_Of_Partitions": "0",
+                            "Projection_Cached": "False"
+                        },
+                        "name": "warehouse_dimension_super",
+                        "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "public.warehouse_dimension_super",
+                        "platform": "urn:li:dataPlatform:vertica",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "warehouse_key",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "warehouse_name",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=20)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "warehouse_address",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=256)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "warehouse_city",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=60)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "warehouse_state",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=2)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "warehouse_region",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "VARCHAR(length=32)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "Projections"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "urn:li:container:343f520ad0fb3259b298736800bb1385",
+                    "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385"
+                },
+                {
+                    "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4",
+                    "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:dbadmin",
+                    "type": "DATAOWNER"
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "container",
+    "aspect": {
+        "json": {
+            "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "ROS_Count": "1",
+                            "Projection_Type": "is_super_projection",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(shipping_dimension.shipping_key)",
+                            "Projection_size": "1 KB",
+                            "Partition_Key": "Not Available",
+                            "Number_Of_Partitions": "0",
+                            "Projection_Cached": "False"
+                        },
+                        "name": "shipping_dimension_super",
+                        "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "public.shipping_dimension_super",
+                        "platform": "urn:li:dataPlatform:vertica",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "shipping_key",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                    }
+                                },
+                                "nativeDataType": "INTEGER()",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "ship_type",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=30)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "ship_mode",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=10)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            },
+                            {
+                                "fieldPath": "ship_carrier",
+                                "nullable": true,
+                                "description": "",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "CHAR(length=20)",
+                                "recursive": false,
+                                "isPartOfKey": false
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "Projections"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "urn:li:container:343f520ad0fb3259b298736800bb1385",
+                    "urn": "urn:li:container:343f520ad0fb3259b298736800bb1385"
+                },
+                {
+                    "id": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4",
+                    "urn": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "json": {
+            "owners": [
+                {
+                    "owner": "urn:li:corpuser:dbadmin",
+                    "type": "DATAOWNER"
+                }
+            ],
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            }
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "container",
+    "aspect": {
+        "json": {
+            "container": "urn:li:container:eb682025a9113b5543ec7ed26bfa21e4"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "ROS_Count": "1",
+                            "Projection_Type": "is_super_projection",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(inventory_fact.date_key, inventory_fact.product_key, inventory_fact.product_version, inventory_fact.warehouse_key, inventory_fact.qty_in_stock)",
+                            "Projection_size": "2564 KB",
+                            "Partition_Key": "Not Available",
+                            "Number_Of_Partitions": "0",
+                            "Projection_Cached": "False"
+                        },
+                        "name": "inventory_fact_super",
+                        "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "public.inventory_fact_super",
+                        "platform": "urn:li:dataPlatform:vertica",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+                                "tableSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "date_key",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=64)",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "reports_to",
+                                "fieldPath": "product_key",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5193,7 +6726,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "salaried_flag",
+                                "fieldPath": "product_version",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5206,7 +6739,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "annual_salary",
+                                "fieldPath": "warehouse_key",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5219,7 +6752,7 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "hourly_rate",
+                                "fieldPath": "qty_in_stock",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5227,20 +6760,20 @@
                                         "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "FLOAT()",
+                                "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "vacation_days",
+                                "fieldPath": "inventory_date",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
+                                        "com.linkedin.pegasus2avro.schema.DateType": {}
                                     }
                                 },
-                                "nativeDataType": "INTEGER()",
+                                "nativeDataType": "DATE()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             }
@@ -5252,12 +6785,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -5269,12 +6803,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "upstreamLineage",
     "aspect": {
@@ -5285,7 +6820,7 @@
                         "time": 0,
                         "actor": "urn:li:corpuser:unknown"
                     },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension,PROD)",
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact,PROD)",
                     "type": "TRANSFORMED"
                 }
             ]
@@ -5293,12 +6828,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.employee_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -5317,12 +6853,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -5341,12 +6878,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -5356,13 +6894,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -5373,22 +6912,22 @@
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
                             "ROS_Count": "1",
-                            "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
-                            "Segmentation_key": "hash(warehouse_dimension.warehouse_key)",
-                            "projection_size": "2 KB",
+                            "Projection_Type": "is_aggregate_projection, has_expressions",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(readings.meter_id)",
+                            "Projection_size": "0 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
-                        "name": "warehouse_dimension_super",
+                        "name": "readings_topk",
                         "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.warehouse_dimension_super",
+                        "schemaName": "public.readings_topk",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -5407,7 +6946,7 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "warehouse_key",
+                                "fieldPath": "meter_id",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5420,67 +6959,28 @@
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "warehouse_name",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=20)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "warehouse_address",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=256)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "warehouse_city",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "VARCHAR(length=60)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "warehouse_state",
+                                "fieldPath": "recent_date",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
                                     }
                                 },
-                                "nativeDataType": "CHAR(length=2)",
+                                "nativeDataType": "TIMESTAMP_WITH_PRECISION()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             },
                             {
-                                "fieldPath": "warehouse_region",
+                                "fieldPath": "recent_value",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
                                     }
                                 },
-                                "nativeDataType": "VARCHAR(length=32)",
+                                "nativeDataType": "FLOAT()",
                                 "recursive": false,
                                 "isPartOfKey": false
                             }
@@ -5492,12 +6992,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -5509,12 +7010,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)",
     "changeType": "UPSERT",
     "aspectName": "upstreamLineage",
     "aspect": {
@@ -5525,7 +7027,7 @@
                         "time": 0,
                         "actor": "urn:li:corpuser:unknown"
                     },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension,PROD)",
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)",
                     "type": "TRANSFORMED"
                 }
             ]
@@ -5533,12 +7035,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.warehouse_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.readings_topk,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -5557,12 +7060,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -5581,12 +7085,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -5596,13 +7101,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -5613,22 +7119,22 @@
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
                             "ROS_Count": "1",
-                            "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
-                            "Segmentation_key": "hash(shipping_dimension.shipping_key)",
-                            "projection_size": "1 KB",
+                            "Projection_Type": "is_aggregate_projection, has_expressions",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(clicks.page_id, (clicks.click_time)::date)",
+                            "Projection_size": "0 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
-                        "name": "shipping_dimension_super",
+                        "name": "clicks_agg",
                         "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.shipping_dimension_super",
+                        "schemaName": "public.clicks_agg",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -5647,7 +7153,7 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "shipping_key",
+                                "fieldPath": "page_id",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
@@ -5658,45 +7164,6 @@
                                 "nativeDataType": "INTEGER()",
                                 "recursive": false,
                                 "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "ship_type",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=30)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "ship_mode",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=10)",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "ship_carrier",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "CHAR(length=20)",
-                                "recursive": false,
-                                "isPartOfKey": false
                             }
                         ]
                     }
@@ -5706,12 +7173,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -5723,12 +7191,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)",
     "changeType": "UPSERT",
     "aspectName": "upstreamLineage",
     "aspect": {
@@ -5739,7 +7208,7 @@
                         "time": 0,
                         "actor": "urn:li:corpuser:unknown"
                     },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension,PROD)",
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)",
                     "type": "TRANSFORMED"
                 }
             ]
@@ -5747,12 +7216,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.shipping_dimension_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.clicks_agg,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -5771,12 +7241,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "ownership",
     "aspect": {
@@ -5795,12 +7266,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "container",
     "aspect": {
@@ -5810,13 +7282,14 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)",
             "aspects": [
                 {
                     "com.linkedin.pegasus2avro.common.Status": {
@@ -5828,21 +7301,21 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
-                            "Segmentation_key": "hash(inventory_fact.date_key, inventory_fact.product_key, inventory_fact.product_version, inventory_fact.warehouse_key, inventory_fact.qty_in_stock)",
-                            "projection_size": "2566 KB",
+                            "Is_Segmented": "True",
+                            "Segmentation_key": "hash(phrases.phrase)",
+                            "Projection_size": "0 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
-                        "name": "inventory_fact_super",
+                        "name": "phrases_super",
                         "description": "Vertica physically stores table data in projections,             which are collections of table columns. Projections store data in a format that optimizes query execution             For more info on projections and corresponding properties check out the Vertica Docs: https://www.vertica.com/docs",
                         "tags": []
                     }
                 },
                 {
                     "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "public.inventory_fact_super",
+                        "schemaName": "public.phrases_super",
                         "platform": "urn:li:dataPlatform:vertica",
                         "version": 0,
                         "created": {
@@ -5861,80 +7334,15 @@
                         },
                         "fields": [
                             {
-                                "fieldPath": "date_key",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "product_key",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "product_version",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "warehouse_key",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "qty_in_stock",
-                                "nullable": true,
-                                "description": "",
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "INTEGER()",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "inventory_date",
+                                "fieldPath": "phrase",
                                 "nullable": true,
                                 "description": "",
                                 "type": {
                                     "type": {
-                                        "com.linkedin.pegasus2avro.schema.DateType": {}
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
                                     }
                                 },
-                                "nativeDataType": "DATE()",
+                                "nativeDataType": "VARCHAR(length=128)",
                                 "recursive": false,
                                 "isPartOfKey": false
                             }
@@ -5946,12 +7354,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "subTypes",
     "aspect": {
@@ -5963,12 +7372,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "upstreamLineage",
     "aspect": {
@@ -5979,7 +7389,7 @@
                         "time": 0,
                         "actor": "urn:li:corpuser:unknown"
                     },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact,PROD)",
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases,PROD)",
                     "type": "TRANSFORMED"
                 }
             ]
@@ -5987,12 +7397,13 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.inventory_fact_super,PROD)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:vertica,public.phrases_super,PROD)",
     "changeType": "UPSERT",
     "aspectName": "browsePathsV2",
     "aspect": {
@@ -6011,7 +7422,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6035,7 +7447,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6050,7 +7463,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6065,7 +7479,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6082,7 +7497,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6097,7 +7513,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6117,7 +7534,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6141,7 +7559,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6156,7 +7575,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6172,7 +7592,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.393181+00:00"
+                            "create_time": "2023-10-13 11:22:37.879951+00:00",
+                            "table_size": "2 KB"
                         },
                         "name": "store_dimension",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -6441,7 +7862,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6458,7 +7880,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6482,7 +7905,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6506,7 +7930,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6521,7 +7946,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6537,7 +7963,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.404717+00:00"
+                            "create_time": "2023-10-13 11:22:37.890717+00:00",
+                            "table_size": "8646 KB"
                         },
                         "name": "store_orders_fact",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -6819,7 +8246,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6836,7 +8264,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6860,7 +8289,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6884,7 +8314,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6899,7 +8330,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -6915,7 +8347,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.396731+00:00"
+                            "create_time": "2023-10-13 11:22:37.883186+00:00",
+                            "table_size": "225060 KB"
                         },
                         "name": "store_sales_fact",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -7171,7 +8604,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7188,7 +8622,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7212,7 +8647,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7236,7 +8672,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7251,7 +8688,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7269,11 +8707,11 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
+                            "Is_Segmented": "True",
                             "Segmentation_key": "hash(store_dimension.store_key)",
-                            "projection_size": "2 KB",
+                            "Projection_size": "2 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
                         "name": "store_dimension_super",
@@ -7543,7 +8981,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7560,7 +8999,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7584,7 +9024,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7608,7 +9049,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7632,7 +9074,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7647,7 +9090,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7665,11 +9109,11 @@
                         "customProperties": {
                             "ROS_Count": "2",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
+                            "Is_Segmented": "True",
                             "Segmentation_key": "hash(store_sales_fact.date_key, store_sales_fact.product_key, store_sales_fact.product_version, store_sales_fact.store_key, store_sales_fact.promotion_key, store_sales_fact.customer_key, store_sales_fact.employee_key, store_sales_fact.pos_transaction_number)",
-                            "projection_size": "225089 KB",
+                            "Projection_size": "225060 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
                         "name": "store_sales_fact_super",
@@ -7926,7 +9370,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7943,7 +9388,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7967,7 +9413,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -7991,7 +9438,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8015,7 +9463,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8030,7 +9479,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8048,11 +9498,11 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
+                            "Is_Segmented": "True",
                             "Segmentation_key": "hash(store_orders_fact.product_key, store_orders_fact.product_version, store_orders_fact.store_key, store_orders_fact.vendor_key, store_orders_fact.employee_key, store_orders_fact.order_number, store_orders_fact.date_ordered, store_orders_fact.date_shipped)",
-                            "projection_size": "8648 KB",
+                            "Projection_size": "8646 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
                         "name": "store_orders_fact_super",
@@ -8335,7 +9785,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8352,7 +9803,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8376,7 +9828,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8400,7 +9853,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8424,7 +9878,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8439,7 +9894,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8454,7 +9910,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8471,7 +9928,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8486,7 +9944,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8506,7 +9965,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8530,7 +9990,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8545,7 +10006,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8561,7 +10023,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.415595+00:00"
+                            "create_time": "2023-10-13 11:22:37.900841+00:00",
+                            "table_size": "6 KB"
                         },
                         "name": "call_center_dimension",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -8752,7 +10215,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8769,7 +10233,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8793,7 +10258,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8817,7 +10283,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8832,7 +10299,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8848,7 +10316,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.412266+00:00"
+                            "create_time": "2023-10-13 11:22:37.897788+00:00",
+                            "table_size": "9 KB"
                         },
                         "name": "online_page_dimension",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -8961,7 +10430,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -8978,7 +10448,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9002,7 +10473,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9026,7 +10498,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9041,7 +10514,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9057,7 +10531,8 @@
                 {
                     "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                         "customProperties": {
-                            "create_time": "2023-07-26 06:37:53.419260+00:00"
+                            "create_time": "2023-10-13 11:22:37.903963+00:00",
+                            "table_size": "182356 KB"
                         },
                         "name": "online_sales_fact",
                         "description": "References the properties of a native table in Vertica.             Vertica physically stores table data in projections, which are collections of table columns.             Projections store data in a format that optimizes query execution.             In order to query or perform any operation on a Vertica table, the table must have one or more projections associated with it. ",
@@ -9352,7 +10827,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9369,7 +10845,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9393,7 +10870,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9417,7 +10895,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9432,7 +10911,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9450,11 +10930,11 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
+                            "Is_Segmented": "True",
                             "Segmentation_key": "hash(online_page_dimension.online_page_key)",
-                            "projection_size": "9 KB",
+                            "Projection_size": "9 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
                         "name": "online_page_dimension_super",
@@ -9568,7 +11048,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9585,7 +11066,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9609,7 +11091,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9633,7 +11116,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9657,7 +11141,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9672,7 +11157,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9690,11 +11176,11 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
+                            "Is_Segmented": "True",
                             "Segmentation_key": "hash(call_center_dimension.call_center_key)",
-                            "projection_size": "6 KB",
+                            "Projection_size": "6 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
                         "name": "call_center_dimension_super",
@@ -9886,7 +11372,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9903,7 +11390,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9927,7 +11415,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9951,7 +11440,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9975,7 +11465,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -9990,7 +11481,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -10008,11 +11500,11 @@
                         "customProperties": {
                             "ROS_Count": "1",
                             "Projection_Type": "is_super_projection",
-                            "is_segmented": "True",
+                            "Is_Segmented": "True",
                             "Segmentation_key": "hash(online_sales_fact.sale_date_key, online_sales_fact.ship_date_key, online_sales_fact.product_key, online_sales_fact.product_version, online_sales_fact.customer_key, online_sales_fact.call_center_key, online_sales_fact.online_page_key, online_sales_fact.shipping_key)",
-                            "projection_size": "182385 KB",
+                            "Projection_size": "182356 KB",
                             "Partition_Key": "Not Available",
-                            "Partition_Size": "0",
+                            "Number_Of_Partitions": "0",
                             "Projection_Cached": "False"
                         },
                         "name": "online_sales_fact_super",
@@ -10308,7 +11800,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -10325,7 +11818,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -10349,7 +11843,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -10373,7 +11868,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1586847600000,
-        "runId": "vertica-2020_04_14-07_00_00"
+        "runId": "vertica-2020_04_14-07_00_00",
+        "lastRunId": "no-run-id-provided"
     }
 }
 ]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/vertica/vertica_to_file.yml b/metadata-ingestion/tests/integration/vertica/vertica_to_file.yml
index ebd800ee09ff5..a182e54bd53c7 100644
--- a/metadata-ingestion/tests/integration/vertica/vertica_to_file.yml
+++ b/metadata-ingestion/tests/integration/vertica/vertica_to_file.yml
@@ -5,6 +5,13 @@ source:
     database: Vmart
     username: dbadmin
     password: abc123
+    include_tables: true
+    include_views: true
+    include_projections: true
+    include_models: true
+    include_view_lineage: true
+    include_projection_lineage: true
+
 
 sink:
   type: file

From d2eb42373fa1d36011c020500668da8cf863e165 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Wed, 18 Oct 2023 11:34:45 -0400
Subject: [PATCH 147/156] fix(ingest/sqlalchemy): Fix URL parsing when
 sqlalchemy_uri provided (#9032)

---
 .../ingestion/source/sql/sql_config.py        | 47 +++++++++++--------
 .../source/sql/two_tier_sql_source.py         | 31 ++++++++----
 .../tests/unit/test_athena_source.py          | 12 ++---
 .../tests/unit/test_clickhouse_source.py      |  4 +-
 .../tests/unit/test_snowflake_source.py       | 30 +++++++-----
 5 files changed, 76 insertions(+), 48 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py
index 08cc74aec3977..57aae32b361cf 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py
@@ -1,10 +1,10 @@
 import logging
 from abc import abstractmethod
 from typing import Any, Dict, Optional
-from urllib.parse import quote_plus
 
 import pydantic
 from pydantic import Field
+from sqlalchemy.engine import URL
 
 from datahub.configuration.common import AllowDenyPattern, ConfigModel
 from datahub.configuration.source_common import (
@@ -125,7 +125,11 @@ class SQLAlchemyConnectionConfig(ConfigModel):
     # Duplicate of SQLCommonConfig.options
     options: dict = pydantic.Field(
         default_factory=dict,
-        description="Any options specified here will be passed to [SQLAlchemy.create_engine](https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine) as kwargs.",
+        description=(
+            "Any options specified here will be passed to "
+            "[SQLAlchemy.create_engine](https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine) as kwargs."
+            " To set connection arguments in the URL, specify them under `connect_args`."
+        ),
     )
 
     _database_alias_deprecation = pydantic_field_deprecated(
@@ -161,21 +165,26 @@ def make_sqlalchemy_uri(
     db: Optional[str],
     uri_opts: Optional[Dict[str, Any]] = None,
 ) -> str:
-    url = f"{scheme}://"
-    if username is not None:
-        url += f"{quote_plus(username)}"
-        if password is not None:
-            url += f":{quote_plus(password)}"
-        url += "@"
-    if at is not None:
-        url += f"{at}"
-    if db is not None:
-        url += f"/{db}"
-    if uri_opts is not None:
-        if db is None:
-            url += "/"
-        params = "&".join(
-            f"{key}={quote_plus(value)}" for (key, value) in uri_opts.items() if value
+    host: Optional[str] = None
+    port: Optional[int] = None
+    if at:
+        try:
+            host, port_str = at.rsplit(":", 1)
+            port = int(port_str)
+        except ValueError:
+            host = at
+            port = None
+    if uri_opts:
+        uri_opts = {k: v for k, v in uri_opts.items() if v is not None}
+
+    return str(
+        URL.create(
+            drivername=scheme,
+            username=username,
+            password=password,
+            host=host,
+            port=port,
+            database=db,
+            query=uri_opts or {},
         )
-        url = f"{url}?{params}"
-    return url
+    )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py
index d9062cef06eae..7a49551dc1235 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py
@@ -1,8 +1,10 @@
 import typing
+import urllib.parse
 from typing import Any, Dict, Iterable, Optional
 
 from pydantic.fields import Field
 from sqlalchemy import create_engine, inspect
+from sqlalchemy.engine import URL
 from sqlalchemy.engine.reflection import Inspector
 
 from datahub.configuration.common import AllowDenyPattern
@@ -41,14 +43,27 @@ def get_sql_alchemy_url(
         uri_opts: typing.Optional[typing.Dict[str, typing.Any]] = None,
         current_db: typing.Optional[str] = None,
     ) -> str:
-        return self.sqlalchemy_uri or make_sqlalchemy_uri(
-            self.scheme,
-            self.username,
-            self.password.get_secret_value() if self.password else None,
-            self.host_port,
-            current_db if current_db else self.database,
-            uri_opts=uri_opts,
-        )
+        if self.sqlalchemy_uri:
+            parsed_url = urllib.parse.urlsplit(self.sqlalchemy_uri)
+            url = URL.create(
+                drivername=parsed_url.scheme,
+                username=parsed_url.username,
+                password=parsed_url.password,
+                host=parsed_url.hostname,
+                port=parsed_url.port,
+                database=current_db or parsed_url.path.lstrip("/"),
+                query=urllib.parse.parse_qs(parsed_url.query),
+            ).update_query_dict(uri_opts or {})
+            return str(url)
+        else:
+            return make_sqlalchemy_uri(
+                self.scheme,
+                self.username,
+                self.password.get_secret_value() if self.password else None,
+                self.host_port,
+                current_db or self.database,
+                uri_opts=uri_opts,
+            )
 
 
 class TwoTierSQLAlchemySource(SQLAlchemySource):
diff --git a/metadata-ingestion/tests/unit/test_athena_source.py b/metadata-ingestion/tests/unit/test_athena_source.py
index 2558f6a46715e..7a947e8f86bfe 100644
--- a/metadata-ingestion/tests/unit/test_athena_source.py
+++ b/metadata-ingestion/tests/unit/test_athena_source.py
@@ -10,7 +10,6 @@
 FROZEN_TIME = "2020-04-14 07:00:00"
 
 
-@pytest.mark.integration
 def test_athena_config_query_location_old_plus_new_value_not_allowed():
     from datahub.ingestion.source.sql.athena import AthenaConfig
 
@@ -25,7 +24,6 @@ def test_athena_config_query_location_old_plus_new_value_not_allowed():
         )
 
 
-@pytest.mark.integration
 def test_athena_config_staging_dir_is_set_as_query_result():
     from datahub.ingestion.source.sql.athena import AthenaConfig
 
@@ -48,7 +46,6 @@ def test_athena_config_staging_dir_is_set_as_query_result():
     assert config.json() == expected_config.json()
 
 
-@pytest.mark.integration
 def test_athena_uri():
     from datahub.ingestion.source.sql.athena import AthenaConfig
 
@@ -59,9 +56,12 @@ def test_athena_uri():
             "work_group": "test-workgroup",
         }
     )
-    assert (
-        config.get_sql_alchemy_url()
-        == "awsathena+rest://@athena.us-west-1.amazonaws.com:443/?s3_staging_dir=s3%3A%2F%2Fquery-result-location%2F&work_group=test-workgroup&catalog_name=awsdatacatalog&duration_seconds=3600"
+    assert config.get_sql_alchemy_url() == (
+        "awsathena+rest://@athena.us-west-1.amazonaws.com:443"
+        "?catalog_name=awsdatacatalog"
+        "&duration_seconds=3600"
+        "&s3_staging_dir=s3%3A%2F%2Fquery-result-location%2F"
+        "&work_group=test-workgroup"
     )
 
 
diff --git a/metadata-ingestion/tests/unit/test_clickhouse_source.py b/metadata-ingestion/tests/unit/test_clickhouse_source.py
index de7e7d66f2129..1b2ffb70c8d19 100644
--- a/metadata-ingestion/tests/unit/test_clickhouse_source.py
+++ b/metadata-ingestion/tests/unit/test_clickhouse_source.py
@@ -26,9 +26,7 @@ def test_clickhouse_uri_native():
             "scheme": "clickhouse+native",
         }
     )
-    assert (
-        config.get_sql_alchemy_url() == "clickhouse+native://user:password@host:1111/"
-    )
+    assert config.get_sql_alchemy_url() == "clickhouse+native://user:password@host:1111"
 
 
 def test_clickhouse_uri_native_secure():
diff --git a/metadata-ingestion/tests/unit/test_snowflake_source.py b/metadata-ingestion/tests/unit/test_snowflake_source.py
index 1c26ca2487e5c..888a7c0441554 100644
--- a/metadata-ingestion/tests/unit/test_snowflake_source.py
+++ b/metadata-ingestion/tests/unit/test_snowflake_source.py
@@ -179,10 +179,12 @@ def test_snowflake_uri_default_authentication():
         }
     )
 
-    assert (
-        config.get_sql_alchemy_url()
-        == "snowflake://user:password@acctname/?authenticator=SNOWFLAKE&warehouse=COMPUTE_WH&role"
-        "=sysadmin&application=acryl_datahub"
+    assert config.get_sql_alchemy_url() == (
+        "snowflake://user:password@acctname"
+        "?application=acryl_datahub"
+        "&authenticator=SNOWFLAKE"
+        "&role=sysadmin"
+        "&warehouse=COMPUTE_WH"
     )
 
 
@@ -198,10 +200,12 @@ def test_snowflake_uri_external_browser_authentication():
         }
     )
 
-    assert (
-        config.get_sql_alchemy_url()
-        == "snowflake://user@acctname/?authenticator=EXTERNALBROWSER&warehouse=COMPUTE_WH&role"
-        "=sysadmin&application=acryl_datahub"
+    assert config.get_sql_alchemy_url() == (
+        "snowflake://user@acctname"
+        "?application=acryl_datahub"
+        "&authenticator=EXTERNALBROWSER"
+        "&role=sysadmin"
+        "&warehouse=COMPUTE_WH"
     )
 
 
@@ -219,10 +223,12 @@ def test_snowflake_uri_key_pair_authentication():
         }
     )
 
-    assert (
-        config.get_sql_alchemy_url()
-        == "snowflake://user@acctname/?authenticator=SNOWFLAKE_JWT&warehouse=COMPUTE_WH&role"
-        "=sysadmin&application=acryl_datahub"
+    assert config.get_sql_alchemy_url() == (
+        "snowflake://user@acctname"
+        "?application=acryl_datahub"
+        "&authenticator=SNOWFLAKE_JWT"
+        "&role=sysadmin"
+        "&warehouse=COMPUTE_WH"
     )
 
 

From 1eaf9c8c5ff2676a5c4ac456d7b4a6d351697e73 Mon Sep 17 00:00:00 2001
From: Tim <50115603+bossenti@users.noreply.github.com>
Date: Wed, 18 Oct 2023 18:39:59 +0200
Subject: [PATCH 148/156] feature(ingest/athena): introduce support for complex
 and nested schemas in Athena (#8137)

Co-authored-by: dnks23 <dominik.s23@live.de>
Co-authored-by: Tamas Nemeth <treff7es@gmail.com>
Co-authored-by: Tim <tim@MBP-von-Tim.fritz.box>
Co-authored-by: Harshal Sheth <hsheth2@gmail.com>
---
 metadata-ingestion/setup.py                   |   5 +-
 .../datahub/ingestion/source/sql/athena.py    | 200 +++++++++++++++++-
 .../ingestion/source/sql/sql_common.py        |   4 +
 .../datahub/ingestion/source/sql/sql_types.py |  12 +-
 .../utilities/sqlalchemy_type_converter.py    | 200 ++++++++++++++++++
 .../tests/unit/test_athena_source.py          |  86 +++++++-
 .../test_sqlalchemy_type_converter.py         |  93 ++++++++
 7 files changed, 589 insertions(+), 11 deletions(-)
 create mode 100644 metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py
 create mode 100644 metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 0b0a2b13fb52a..c46409ecbf52f 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -280,8 +280,9 @@
     # Misc plugins.
     "sql-parser": sqlglot_lib,
     # Source plugins
-    # PyAthena is pinned with exact version because we use private method in PyAthena
-    "athena": sql_common | {"PyAthena[SQLAlchemy]==2.4.1"},
+    # sqlalchemy-bigquery is included here since it provides an implementation of
+    # a SQLalchemy-conform STRUCT type definition
+    "athena": sql_common | {"PyAthena[SQLAlchemy]>=2.6.0,<3.0.0", "sqlalchemy-bigquery>=1.4.1"},
     "azure-ad": set(),
     "bigquery": sql_common
     | bigquery_common
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
index 9cb613bde1e9f..dad61e5173166 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py
@@ -1,12 +1,17 @@
 import json
 import logging
+import re
 import typing
-from typing import Any, Dict, Iterable, List, Optional, Tuple, cast
+from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast
 
 import pydantic
 from pyathena.common import BaseCursor
 from pyathena.model import AthenaTableMetadata
+from pyathena.sqlalchemy_athena import AthenaRestDialect
+from sqlalchemy import create_engine, inspect, types
 from sqlalchemy.engine.reflection import Inspector
+from sqlalchemy.types import TypeEngine
+from sqlalchemy_bigquery import STRUCT
 
 from datahub.configuration.validate_field_rename import pydantic_renamed_field
 from datahub.emitter.mcp_builder import ContainerKey, DatabaseKey
@@ -21,13 +26,164 @@
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.aws.s3_util import make_s3_urn
 from datahub.ingestion.source.common.subtypes import DatasetContainerSubTypes
-from datahub.ingestion.source.sql.sql_common import SQLAlchemySource
+from datahub.ingestion.source.sql.sql_common import (
+    SQLAlchemySource,
+    register_custom_type,
+)
 from datahub.ingestion.source.sql.sql_config import SQLCommonConfig, make_sqlalchemy_uri
+from datahub.ingestion.source.sql.sql_types import MapType
 from datahub.ingestion.source.sql.sql_utils import (
     add_table_to_schema_container,
     gen_database_container,
     gen_database_key,
 )
+from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
+from datahub.metadata.schema_classes import RecordTypeClass
+from datahub.utilities.hive_schema_to_avro import get_avro_schema_for_hive_column
+from datahub.utilities.sqlalchemy_type_converter import (
+    get_schema_fields_for_sqlalchemy_column,
+)
+
+logger = logging.getLogger(__name__)
+
+register_custom_type(STRUCT, RecordTypeClass)
+
+
+class CustomAthenaRestDialect(AthenaRestDialect):
+    """Custom definition of the Athena dialect.
+
+    Custom implementation that allows to extend/modify the behavior of the SQLalchemy
+    dialect that is used by PyAthena (which is the library that is used by DataHub
+    to extract metadata from Athena).
+    This dialect can then be used by the inspector (see get_inspectors()).
+
+    """
+
+    # regex to identify complex types in DDL strings which are embedded in `<>`.
+    _complex_type_pattern = re.compile(r"(<.+>)")
+
+    @typing.no_type_check
+    def _get_column_type(
+        self, type_: Union[str, Dict[str, Any]]
+    ) -> TypeEngine:  # noqa: C901
+        """Derives the data type of the Athena column.
+
+        This method is overwritten to extend the behavior of PyAthena.
+        Pyathena is not capable of detecting complex data types, e.g.,
+        arrays, maps, or, structs (as of version 2.25.2).
+        The custom implementation extends the functionality by the above-mentioned data types.
+        """
+
+        # Originally, this method only handles `type_` as a string
+        # With the workaround used below to parse DDL strings for structs,
+        # `type` might also be a dictionary
+        if isinstance(type_, str):
+            match = self._pattern_column_type.match(type_)
+            if match:
+                type_name = match.group(1).lower()
+                type_meta_information = match.group(2)
+            else:
+                type_name = type_.lower()
+                type_meta_information = None
+        elif isinstance(type_, dict):
+            # this occurs only when a type parsed as part of a STRUCT is passed
+            # in such case type_ is a dictionary whose type can be retrieved from the attribute
+            type_name = type_.get("type", None)
+            type_meta_information = None
+        else:
+            raise RuntimeError(f"Unsupported type definition: {type_}")
+
+        args = []
+
+        if type_name in ["array"]:
+            detected_col_type = types.ARRAY
+
+            # here we need to account again for two options how `type_` is passed to this method
+            # first, the simple array definition as a DDL string (something like array<string>)
+            # this is always the case when the array is not part of a complex data type (mainly STRUCT)
+            # second, the array definition can also be passed in form of dictionary
+            # this is the case when the array is part of a complex data type
+            if isinstance(type_, str):
+                # retrieve the raw name of the data type as a string
+                array_type_raw = self._complex_type_pattern.findall(type_)[0][
+                    1:-1
+                ]  # array type without enclosing <>
+                # convert the string name of the data type into a SQLalchemy type (expected return)
+                array_type = self._get_column_type(array_type_raw)
+            elif isinstance(type_, dict):
+                # retrieve the data type of the array items and
+                # transform it into a SQLalchemy type
+                array_type = self._get_column_type(type_["items"])
+            else:
+                raise RuntimeError(f"Unsupported array definition: {type_}")
+
+            args = [array_type]
+
+        elif type_name in ["struct", "record"]:
+            # STRUCT is not part of the SQLalchemy types selection
+            # but is provided by another official SQLalchemy library and
+            # compatible with the other SQLalchemy types
+            detected_col_type = STRUCT
+
+            if isinstance(type_, dict):
+                # in case a struct as part of another struct is passed
+                # it is provided in form of a dictionary and
+                # can simply be used for the further processing
+                struct_type = type_
+            else:
+                # this is the case when the type definition of the struct is passed as a DDL string
+                # therefore, it is required to parse the DDL string
+                # here a method provided in another Datahub source is used so that the parsing
+                # doesn't need to be implemented twice
+                # `get_avro_schema_for_hive_column` accepts a DDL description as column type and
+                # returns the parsed data types in form of a dictionary
+                schema = get_avro_schema_for_hive_column(
+                    hive_column_name=type_name, hive_column_type=type_
+                )
+
+                # the actual type description needs to be extracted
+                struct_type = schema["fields"][0]["type"]
+
+            # A STRUCT consist of multiple attributes which are expected to be passed as
+            # a list of tuples consisting of name data type pairs. e.g., `('age', Integer())`
+            # See the reference:
+            # https://github.com/googleapis/python-bigquery-sqlalchemy/blob/main/sqlalchemy_bigquery/_struct.py#L53
+            #
+            # To extract all of them, we simply iterate over all detected fields and
+            # convert them to SQLalchemy types
+            struct_args = []
+            for field in struct_type["fields"]:
+                struct_args.append(
+                    (
+                        field["name"],
+                        self._get_column_type(field["type"]["type"])
+                        if field["type"]["type"] not in ["record", "array"]
+                        else self._get_column_type(field["type"]),
+                    )
+                )
+
+            args = struct_args
+
+        elif type_name in ["map"]:
+            # Instead of SQLalchemy's TupleType the custom MapType is used here
+            # which is just a simple wrapper around TupleType
+            detected_col_type = MapType
+
+            # the type definition for maps looks like the following: key_type:val_type (e.g., string:string)
+            key_type_raw, value_type_raw = type_meta_information.split(",")
+
+            # convert both type names to actual SQLalchemy types
+            args = [
+                self._get_column_type(key_type_raw),
+                self._get_column_type(value_type_raw),
+            ]
+        # by using get_avro_schema_for_hive_column() for parsing STRUCTs the data type `long`
+        # can also be returned, so we need to extend the handling here as well
+        elif type_name in ["bigint", "long"]:
+            detected_col_type = types.BIGINT
+        else:
+            return super()._get_column_type(type_name)
+        return detected_col_type(*args)
 
 
 class AthenaConfig(SQLCommonConfig):
@@ -129,6 +285,18 @@ def create(cls, config_dict, ctx):
         config = AthenaConfig.parse_obj(config_dict)
         return cls(config, ctx)
 
+    # overwrite this method to allow to specify the usage of a custom dialect
+    def get_inspectors(self) -> Iterable[Inspector]:
+        url = self.config.get_sql_alchemy_url()
+        logger.debug(f"sql_alchemy_url={url}")
+        engine = create_engine(url, **self.config.options)
+
+        # set custom dialect to be used by the inspector
+        engine.dialect = CustomAthenaRestDialect()
+        with engine.connect() as conn:
+            inspector = inspect(conn)
+            yield inspector
+
     def get_table_properties(
         self, inspector: Inspector, schema: str, table: str
     ) -> Tuple[Optional[str], Dict[str, str], Optional[str]]:
@@ -136,9 +304,7 @@ def get_table_properties(
             self.cursor = cast(BaseCursor, inspector.engine.raw_connection().cursor())
             assert self.cursor
 
-        # Unfortunately properties can be only get through private methods as those are not exposed
-        # https://github.com/laughingman7743/PyAthena/blob/9e42752b0cc7145a87c3a743bb2634fe125adfa7/pyathena/model.py#L201
-        metadata: AthenaTableMetadata = self.cursor._get_table_metadata(
+        metadata: AthenaTableMetadata = self.cursor.get_table_metadata(
             table_name=table, schema_name=schema
         )
         description = metadata.comment
@@ -241,6 +407,30 @@ def get_schema_names(self, inspector: Inspector) -> List[str]:
             return [schema for schema in schemas if schema == athena_config.database]
         return schemas
 
+    # Overwrite to modify the creation of schema fields
+    def get_schema_fields_for_column(
+        self,
+        dataset_name: str,
+        column: Dict,
+        pk_constraints: Optional[dict] = None,
+        tags: Optional[List[str]] = None,
+    ) -> List[SchemaField]:
+        fields = get_schema_fields_for_sqlalchemy_column(
+            column_name=column["name"],
+            column_type=column["type"],
+            description=column.get("comment", None),
+            nullable=column.get("nullable", True),
+            is_part_of_key=True
+            if (
+                pk_constraints is not None
+                and isinstance(pk_constraints, dict)
+                and column["name"] in pk_constraints.get("constrained_columns", [])
+            )
+            else False,
+        )
+
+        return fields
+
     def close(self):
         if self.cursor:
             self.cursor.close()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
index 056be6c2e50ac..6524eea8222d4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
@@ -37,6 +37,7 @@
     DatasetSubTypes,
 )
 from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
+from datahub.ingestion.source.sql.sql_types import MapType
 from datahub.ingestion.source.sql.sql_utils import (
     add_table_to_schema_container,
     downgrade_schema_from_v2,
@@ -80,6 +81,7 @@
     DatasetLineageTypeClass,
     DatasetPropertiesClass,
     GlobalTagsClass,
+    MapTypeClass,
     SubTypesClass,
     TagAssociationClass,
     UpstreamClass,
@@ -154,6 +156,8 @@ class SqlWorkUnit(MetadataWorkUnit):
     types.DATETIME: TimeTypeClass,
     types.TIMESTAMP: TimeTypeClass,
     types.JSON: RecordTypeClass,
+    # additional type definitions that are used by the Athena source
+    MapType: MapTypeClass,  # type: ignore
     # Because the postgresql dialect is used internally by many other dialects,
     # we add some postgres types here. This is ok to do because the postgresql
     # dialect is built-in to sqlalchemy.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py
index 3b4a7e1dc0287..51626891e9fef 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py
@@ -1,13 +1,15 @@
 import re
 from typing import Any, Dict, ValuesView
 
+from sqlalchemy import types
+
 from datahub.metadata.com.linkedin.pegasus2avro.schema import (
     ArrayType,
     BooleanType,
     BytesType,
     DateType,
     EnumType,
-    MapType,
+    MapType as MapTypeAvro,
     NullType,
     NumberType,
     RecordType,
@@ -363,10 +365,16 @@ def resolve_vertica_modified_type(type_string: str) -> Any:
     "time": TimeType,
     "timestamp": TimeType,
     "row": RecordType,
-    "map": MapType,
+    "map": MapTypeAvro,
     "array": ArrayType,
 }
 
+
+class MapType(types.TupleType):
+    # Wrapper class around SQLalchemy's TupleType to increase compatibility with DataHub
+    pass
+
+
 # https://docs.aws.amazon.com/athena/latest/ug/data-types.html
 # https://github.com/dbt-athena/dbt-athena/tree/main
 ATHENA_SQL_TYPES_MAP: Dict[str, Any] = {
diff --git a/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py b/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py
new file mode 100644
index 0000000000000..a431f262a85fd
--- /dev/null
+++ b/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py
@@ -0,0 +1,200 @@
+import json
+import logging
+import uuid
+from typing import Any, Dict, List, Optional, Type, Union
+
+from sqlalchemy import types
+from sqlalchemy_bigquery import STRUCT
+
+from datahub.ingestion.extractor.schema_util import avro_schema_to_mce_fields
+from datahub.ingestion.source.sql.sql_types import MapType
+from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
+from datahub.metadata.schema_classes import NullTypeClass, SchemaFieldDataTypeClass
+
+logger = logging.getLogger(__name__)
+
+
+class SqlAlchemyColumnToAvroConverter:
+    """Helper class that collects some methods to convert SQLalchemy columns to Avro schema."""
+
+    # tuple of complex data types that require a special handling
+    _COMPLEX_TYPES = (STRUCT, types.ARRAY, MapType)
+
+    # mapping of primitive SQLalchemy data types to AVRO schema data types
+    PRIMITIVE_SQL_ALCHEMY_TYPE_TO_AVRO_TYPE: Dict[Type[types.TypeEngine], str] = {
+        types.String: "string",
+        types.BINARY: "string",
+        types.BOOLEAN: "boolean",
+        types.FLOAT: "float",
+        types.INTEGER: "int",
+        types.BIGINT: "long",
+        types.VARCHAR: "string",
+        types.CHAR: "string",
+    }
+
+    @classmethod
+    def get_avro_type(
+        cls, column_type: Union[types.TypeEngine, STRUCT, MapType], nullable: bool
+    ) -> Dict[str, Any]:
+        """Determines the concrete AVRO schema type for a SQLalchemy-typed column"""
+
+        if type(column_type) in cls.PRIMITIVE_SQL_ALCHEMY_TYPE_TO_AVRO_TYPE.keys():
+            return {
+                "type": cls.PRIMITIVE_SQL_ALCHEMY_TYPE_TO_AVRO_TYPE[type(column_type)],
+                "native_data_type": str(column_type),
+                "_nullable": nullable,
+            }
+        if isinstance(column_type, types.DECIMAL):
+            return {
+                "type": "bytes",
+                "logicalType": "decimal",
+                "precision": int(column_type.precision),
+                "scale": int(column_type.scale),
+                "native_data_type": str(column_type),
+                "_nullable": nullable,
+            }
+        if isinstance(column_type, types.DATE):
+            return {
+                "type": "int",
+                "logicalType": "date",
+                "native_data_type": str(column_type),
+                "_nullable": nullable,
+            }
+        if isinstance(column_type, types.TIMESTAMP):
+            return {
+                "type": "long",
+                "logicalType": "timestamp-millis",
+                "native_data_type": str(column_type),
+                "_nullable": nullable,
+            }
+        if isinstance(column_type, types.ARRAY):
+            array_type = column_type.item_type
+            return {
+                "type": "array",
+                "items": cls.get_avro_type(column_type=array_type, nullable=nullable),
+                "native_data_type": f"array<{str(column_type.item_type)}>",
+            }
+        if isinstance(column_type, MapType):
+            key_type = column_type.types[0]
+            value_type = column_type.types[1]
+            return {
+                "type": "map",
+                "values": cls.get_avro_type(column_type=value_type, nullable=nullable),
+                "native_data_type": str(column_type),
+                "key_type": cls.get_avro_type(column_type=key_type, nullable=nullable),
+                "key_native_data_type": str(key_type),
+            }
+        if isinstance(column_type, STRUCT):
+            fields = []
+            for field_def in column_type._STRUCT_fields:
+                field_name, field_type = field_def
+                fields.append(
+                    {
+                        "name": field_name,
+                        "type": cls.get_avro_type(
+                            column_type=field_type, nullable=nullable
+                        ),
+                    }
+                )
+            struct_name = f"__struct_{str(uuid.uuid4()).replace('-', '')}"
+
+            return {
+                "type": "record",
+                "name": struct_name,
+                "fields": fields,
+                "native_data_type": str(column_type),
+                "_nullable": nullable,
+            }
+
+        return {
+            "type": "null",
+            "native_data_type": str(column_type),
+            "_nullable": nullable,
+        }
+
+    @classmethod
+    def get_avro_for_sqlalchemy_column(
+        cls,
+        column_name: str,
+        column_type: types.TypeEngine,
+        nullable: bool,
+    ) -> Union[object, Dict[str, object]]:
+        """Returns the AVRO schema representation of a SQLalchemy column."""
+        if isinstance(column_type, cls._COMPLEX_TYPES):
+            return {
+                "type": "record",
+                "name": "__struct_",
+                "fields": [
+                    {
+                        "name": column_name,
+                        "type": cls.get_avro_type(
+                            column_type=column_type, nullable=nullable
+                        ),
+                    }
+                ],
+            }
+        return cls.get_avro_type(column_type=column_type, nullable=nullable)
+
+
+def get_schema_fields_for_sqlalchemy_column(
+    column_name: str,
+    column_type: types.TypeEngine,
+    description: Optional[str] = None,
+    nullable: Optional[bool] = True,
+    is_part_of_key: Optional[bool] = False,
+) -> List[SchemaField]:
+    """Creates SchemaFields from a given SQLalchemy column.
+
+    This function is analogous to `get_schema_fields_for_hive_column` from datahub.utilities.hive_schema_to_avro.
+    The main purpose of implementing it this way, is to make it ready/compatible for second field path generation,
+    which allows to explore nested structures within the UI.
+    """
+
+    if nullable is None:
+        nullable = True
+
+    try:
+        # as a first step, the column is converted to AVRO JSON which can then be used by an existing function
+        avro_schema_json = (
+            SqlAlchemyColumnToAvroConverter.get_avro_for_sqlalchemy_column(
+                column_name=column_name,
+                column_type=column_type,
+                nullable=nullable,
+            )
+        )
+        # retrieve schema field definitions from the above generated AVRO JSON structure
+        schema_fields = avro_schema_to_mce_fields(
+            avro_schema=json.dumps(avro_schema_json),
+            default_nullable=nullable,
+            swallow_exceptions=False,
+        )
+    except Exception as e:
+        logger.warning(
+            f"Unable to parse column {column_name} and type {column_type} the error was: {e}"
+        )
+
+        # fallback description in case any exception occurred
+        schema_fields = [
+            SchemaField(
+                fieldPath=column_name,
+                type=SchemaFieldDataTypeClass(type=NullTypeClass()),
+                nativeDataType=str(column_type),
+            )
+        ]
+
+    # for all non-nested data types an additional modification of the `fieldPath` property is required
+    if type(column_type) in (
+        *SqlAlchemyColumnToAvroConverter.PRIMITIVE_SQL_ALCHEMY_TYPE_TO_AVRO_TYPE.keys(),
+        types.TIMESTAMP,
+        types.DATE,
+        types.DECIMAL,
+    ):
+        schema_fields[0].fieldPath += f".{column_name}"
+
+    if description:
+        schema_fields[0].description = description
+    schema_fields[0].isPartOfKey = (
+        is_part_of_key if is_part_of_key is not None else False
+    )
+
+    return schema_fields
diff --git a/metadata-ingestion/tests/unit/test_athena_source.py b/metadata-ingestion/tests/unit/test_athena_source.py
index 7a947e8f86bfe..6d3ed20eafde2 100644
--- a/metadata-ingestion/tests/unit/test_athena_source.py
+++ b/metadata-ingestion/tests/unit/test_athena_source.py
@@ -3,9 +3,13 @@
 
 import pytest
 from freezegun import freeze_time
+from sqlalchemy import types
+from sqlalchemy_bigquery import STRUCT
 
 from datahub.ingestion.api.common import PipelineContext
-from src.datahub.ingestion.source.aws.s3_util import make_s3_urn
+from datahub.ingestion.source.aws.s3_util import make_s3_urn
+from datahub.ingestion.source.sql.athena import CustomAthenaRestDialect
+from datahub.ingestion.source.sql.sql_types import MapType
 
 FROZEN_TIME = "2020-04-14 07:00:00"
 
@@ -104,7 +108,7 @@ def test_athena_get_table_properties():
     mock_cursor = mock.MagicMock()
     mock_inspector = mock.MagicMock()
     mock_inspector.engine.raw_connection().cursor.return_value = mock_cursor
-    mock_cursor._get_table_metadata.return_value = AthenaTableMetadata(
+    mock_cursor.get_table_metadata.return_value = AthenaTableMetadata(
         response=table_metadata
     )
 
@@ -126,3 +130,81 @@ def test_athena_get_table_properties():
     }
 
     assert location == make_s3_urn("s3://testLocation", "PROD")
+
+
+def test_get_column_type_simple_types():
+    assert isinstance(
+        CustomAthenaRestDialect()._get_column_type(type_="int"), types.Integer
+    )
+    assert isinstance(
+        CustomAthenaRestDialect()._get_column_type(type_="string"), types.String
+    )
+    assert isinstance(
+        CustomAthenaRestDialect()._get_column_type(type_="boolean"), types.BOOLEAN
+    )
+    assert isinstance(
+        CustomAthenaRestDialect()._get_column_type(type_="long"), types.BIGINT
+    )
+    assert isinstance(
+        CustomAthenaRestDialect()._get_column_type(type_="double"), types.FLOAT
+    )
+
+
+def test_get_column_type_array():
+    result = CustomAthenaRestDialect()._get_column_type(type_="array<string>")
+
+    assert isinstance(result, types.ARRAY)
+    assert isinstance(result.item_type, types.String)
+
+
+def test_get_column_type_map():
+    result = CustomAthenaRestDialect()._get_column_type(type_="map<string,int>")
+
+    assert isinstance(result, MapType)
+    assert isinstance(result.types[0], types.String)
+    assert isinstance(result.types[1], types.Integer)
+
+
+def test_column_type_struct():
+
+    result = CustomAthenaRestDialect()._get_column_type(type_="struct<test:string>")
+
+    assert isinstance(result, STRUCT)
+    assert isinstance(result._STRUCT_fields[0], tuple)
+    assert result._STRUCT_fields[0][0] == "test"
+    assert isinstance(result._STRUCT_fields[0][1], types.String)
+
+
+def test_column_type_complex_combination():
+
+    result = CustomAthenaRestDialect()._get_column_type(
+        type_="struct<id:string,name:string,choices:array<struct<id:string,label:string>>>"
+    )
+
+    assert isinstance(result, STRUCT)
+
+    assert isinstance(result._STRUCT_fields[0], tuple)
+    assert result._STRUCT_fields[0][0] == "id"
+    assert isinstance(result._STRUCT_fields[0][1], types.String)
+
+    assert isinstance(result._STRUCT_fields[1], tuple)
+    assert result._STRUCT_fields[1][0] == "name"
+    assert isinstance(result._STRUCT_fields[1][1], types.String)
+
+    assert isinstance(result._STRUCT_fields[2], tuple)
+    assert result._STRUCT_fields[2][0] == "choices"
+    assert isinstance(result._STRUCT_fields[2][1], types.ARRAY)
+
+    assert isinstance(result._STRUCT_fields[2][1].item_type, STRUCT)
+
+    assert isinstance(result._STRUCT_fields[2][1].item_type._STRUCT_fields[0], tuple)
+    assert result._STRUCT_fields[2][1].item_type._STRUCT_fields[0][0] == "id"
+    assert isinstance(
+        result._STRUCT_fields[2][1].item_type._STRUCT_fields[0][1], types.String
+    )
+
+    assert isinstance(result._STRUCT_fields[2][1].item_type._STRUCT_fields[1], tuple)
+    assert result._STRUCT_fields[2][1].item_type._STRUCT_fields[1][0] == "label"
+    assert isinstance(
+        result._STRUCT_fields[2][1].item_type._STRUCT_fields[1][1], types.String
+    )
diff --git a/metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py b/metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py
new file mode 100644
index 0000000000000..959da0987a825
--- /dev/null
+++ b/metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py
@@ -0,0 +1,93 @@
+from typing import no_type_check
+
+from sqlalchemy import types
+from sqlalchemy_bigquery import STRUCT
+
+from datahub.ingestion.source.sql.sql_types import MapType
+from datahub.metadata.schema_classes import (
+    ArrayTypeClass,
+    MapTypeClass,
+    NullTypeClass,
+    NumberTypeClass,
+    RecordTypeClass,
+)
+from datahub.utilities.sqlalchemy_type_converter import (
+    get_schema_fields_for_sqlalchemy_column,
+)
+
+
+def test_get_avro_schema_for_sqlalchemy_column():
+    schema_fields = get_schema_fields_for_sqlalchemy_column(
+        column_name="test", column_type=types.INTEGER()
+    )
+    assert len(schema_fields) == 1
+    assert schema_fields[0].fieldPath == "[version=2.0].[type=int].test"
+    assert schema_fields[0].type.type == NumberTypeClass()
+    assert schema_fields[0].nativeDataType == "INTEGER"
+    assert schema_fields[0].nullable is True
+
+    schema_fields = get_schema_fields_for_sqlalchemy_column(
+        column_name="test", column_type=types.String(), nullable=False
+    )
+    assert len(schema_fields) == 1
+    assert schema_fields[0].fieldPath == "[version=2.0].[type=string].test"
+    assert schema_fields[0].type.type == NumberTypeClass()
+    assert schema_fields[0].nativeDataType == "VARCHAR"
+    assert schema_fields[0].nullable is False
+
+
+def test_get_avro_schema_for_sqlalchemy_array_column():
+    schema_fields = get_schema_fields_for_sqlalchemy_column(
+        column_name="test", column_type=types.ARRAY(types.FLOAT())
+    )
+    assert len(schema_fields) == 1
+    assert (
+        schema_fields[0].fieldPath
+        == "[version=2.0].[type=struct].[type=array].[type=float].test"
+    )
+    assert schema_fields[0].type.type == ArrayTypeClass(nestedType=["float"])
+    assert schema_fields[0].nativeDataType == "array<FLOAT>"
+
+
+def test_get_avro_schema_for_sqlalchemy_map_column():
+    schema_fields = get_schema_fields_for_sqlalchemy_column(
+        column_name="test", column_type=MapType(types.String(), types.BOOLEAN())
+    )
+    assert len(schema_fields) == 1
+    assert (
+        schema_fields[0].fieldPath
+        == "[version=2.0].[type=struct].[type=map].[type=boolean].test"
+    )
+    assert schema_fields[0].type.type == MapTypeClass(
+        keyType="string", valueType="boolean"
+    )
+    assert schema_fields[0].nativeDataType == "MapType(String(), BOOLEAN())"
+
+
+def test_get_avro_schema_for_sqlalchemy_struct_column() -> None:
+
+    schema_fields = get_schema_fields_for_sqlalchemy_column(
+        column_name="test", column_type=STRUCT(("test", types.INTEGER()))
+    )
+    assert len(schema_fields) == 2
+    assert (
+        schema_fields[0].fieldPath == "[version=2.0].[type=struct].[type=struct].test"
+    )
+    assert schema_fields[0].type.type == RecordTypeClass()
+    assert schema_fields[0].nativeDataType == "STRUCT<test INT64>"
+
+    assert (
+        schema_fields[1].fieldPath
+        == "[version=2.0].[type=struct].[type=struct].test.[type=int].test"
+    )
+    assert schema_fields[1].type.type == NumberTypeClass()
+    assert schema_fields[1].nativeDataType == "INTEGER"
+
+
+@no_type_check
+def test_get_avro_schema_for_sqlalchemy_unknown_column():
+    schema_fields = get_schema_fields_for_sqlalchemy_column("invalid", "test")
+    assert len(schema_fields) == 1
+    assert schema_fields[0].type.type == NullTypeClass()
+    assert schema_fields[0].fieldPath == "[version=2.0].[type=null]"
+    assert schema_fields[0].nativeDataType == "test"

From bd5c4e0d70681b4640d25e62326e05be1c9deb65 Mon Sep 17 00:00:00 2001
From: Saketh-Mahesh <81051119+Saketh-Mahesh@users.noreply.github.com>
Date: Wed, 18 Oct 2023 11:48:39 -0500
Subject: [PATCH 149/156] docs: adding documentation for deployment of DataHub
 on Azure (#8612)

Co-authored-by: Saketh Mahesh <sakethmahesh@Sakeths-MacBook-Air.local>
---
 docs-website/sidebars.js |   1 +
 docs/deploy/azure.md     | 234 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 235 insertions(+)
 create mode 100644 docs/deploy/azure.md

diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index 4fa73c995157a..b2b3df4dfb33c 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -158,6 +158,7 @@ module.exports = {
         // The purpose of this section is to provide the minimum steps required to deploy DataHub to the vendor of your choosing
         "docs/deploy/aws",
         "docs/deploy/gcp",
+        "docs/deploy/azure",
         "docker/README",
         "docs/deploy/kubernetes",
         "docs/deploy/environment-vars",
diff --git a/docs/deploy/azure.md b/docs/deploy/azure.md
new file mode 100644
index 0000000000000..b940b82827e94
--- /dev/null
+++ b/docs/deploy/azure.md
@@ -0,0 +1,234 @@
+---
+title: "Deploying to Azure"
+---
+
+# Azure setup guide
+
+The following is a set of instructions to quickstart DataHub on Azure Kubernetes Service (AKS). Note, the guide
+assumes that you do not have a Kubernetes cluster set up. 
+
+## Prerequisites
+
+This guide requires the following tools:
+
+- [kubectl](https://kubernetes.io/docs/tasks/tools/) to manage Kubernetes resources
+- [helm](https://helm.sh/docs/intro/install/) to deploy the resources based on helm charts. Note, we only support Helm
+    3.
+- [AZ CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli) to manage Azure resources
+
+To use the above tools, you need to set up Azure credentials by following
+this [guide](https://learn.microsoft.com/en-us/cli/azure/authenticate-azure-cli).
+
+## Start up a Kubernetes cluster on AKS
+
+You can follow this [guide](https://learn.microsoft.com/en-us/azure/aks/learn/quick-kubernetes-deploy-cli) to create a new
+cluster using az cli. 
+
+Note: you can skip the application deployment step since we are deploying DataHub instead. If you are deploying DataHub to an existing cluster, please
+skip the corresponding sections.
+
+- Verify you have the Microsoft.OperationsManagement and Microsoft.OperationalInsights providers registered on your subscription. These Azure resource providers are required to support Container insights. Check the registration status using the following commands:
+
+```
+az provider show -n Microsoft.OperationsManagement -o table
+az provider show -n Microsoft.OperationalInsights -o table
+```
+
+If they're not registered, register them using the following commands:
+
+```
+az provider register --namespace Microsoft.OperationsManagement
+az provider register --namespace Microsoft.OperationalInsights
+```
+
+- Create a resource group. Change name, location to your choosing.
+
+```
+az group create --name myResourceGroup --location eastus
+```
+
+The following output indicates that the command execution was successful:
+
+```
+{
+  "id": "/subscriptions/<guid>/resourceGroups/myResourceGroup",
+  "location": "eastus",
+  "managedBy": null,
+  "name": "myResourceGroup",
+  "properties": {
+    "provisioningState": "Succeeded"
+  },
+  "tags": null
+}
+```
+- Create an AKS Cluster. For this project, it is best to increase node count to at least 3. Change cluster name, node count, and addons to your choosing.
+
+```
+az aks create -g myResourceGroup -n myAKSCluster --enable-managed-identity --node-count 3 --enable-addons monitoring --generate-ssh-keys
+```
+
+After a few minutes, the command completes and returns JSON-formatted information about the cluster.
+
+- Connect to the cluster
+
+Configure kubectl to connect to your Kubernetes cluster using the az aks get-credentials command.
+
+```
+az aks get-credentials --resource-group myResourceGroup --name myAKSCluster
+```
+
+Verify the connection to your cluster using the `kubectl get` command. This command returns a list of the cluster nodes.
+
+```
+kubectl get nodes
+```
+
+You should get results like below. Make sure node status is Ready.
+
+```
+NAME                                          STATUS   ROLES    AGE   VERSION
+aks-nodepool1-37660971-vmss000000              Ready    agent   24h   v1.25.6
+aks-nodepool1-37660971-vmss000001              Ready    agent   24h   v1.25.6
+aks-nodepool1-37660971-vmss000002              Ready    agent   24h   v1.25.6
+```
+
+## Setup DataHub using Helm
+
+Once the Kubernetes cluster has been set up, you can deploy DataHub and its prerequisites using helm. Please follow the
+steps in this [guide](kubernetes.md). 
+
+
+Notes:
+Since we are using PostgreSQL as the storage layer, change postgresql enabled to true and mysql to false in the values.yaml file of prerequisites.
+Additionally, create a postgresql secret. Make sure to include 3 passwords for the postgresql secret: postgres-password, replication-password, and password.
+
+## Expose endpoints using a load balancer
+
+Now that all the pods are up and running, you need to expose the datahub-frontend end point by setting
+up [ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/). To do this, you need to first set up an
+ingress controller. 
+
+
+There are many [ingress controllers](https://kubernetes.io/docs/concepts/services-networking/ingress-controllers/)  to choose
+from, but here, we will follow this [guide](https://learn.microsoft.com/en-us/azure/application-gateway/tutorial-ingress-controller-add-on-existing) to set up the Azure
+Application Gateway Ingress Controller. 
+
+- Deploy a New Application Gateway.
+
+First, you need to create a WAF policy
+
+```
+az network application-gateway waf-policy create -g myResourceGroup -n myWAFPolicy
+```
+
+- Before the application gateway can be deployed, you'll also need to create a public IP resource, a new virtual network with address space 10.0.0.0/16, and a subnet with address space 10.0.0.0/24. 
+Then, you can deploy your application gateway in the subnet using the publicIP.
+
+Caution: When you use an AKS cluster and application gateway in separate virtual networks, the address spaces of the two virtual networks must not overlap. The default address space that an AKS cluster deploys in is 10.224.0.0/12.
+
+
+```
+az network public-ip create -n myPublicIp -g myResourceGroup --allocation-method Static --sku Standard
+az network vnet create -n myVnet -g myResourceGroup --address-prefix 10.0.0.0/16 --subnet-name mySubnet --subnet-prefix 10.0.0.0/24 
+az network application-gateway create -n myApplicationGateway -l eastus -g myResourceGroup --sku WAF_v2 --public-ip-address myPublicIp --vnet-name myVnet --subnet mySubnet --priority 100 --waf-policy /subscriptions/{subscription_id}/resourceGroups/myResourceGroup/providers/Microsoft.Network/ApplicationGatewayWebApplicationFirewallPolicies/myWAFPolicy
+```
+Change myPublicIp, myResourceGroup, myVnet, mySubnet, and myApplicationGateway to names of your choosing.
+
+
+- Enable the AGIC Add-On in Existing AKS Cluster Through Azure CLI
+
+```
+appgwId=$(az network application-gateway show -n myApplicationGateway -g myResourceGroup -o tsv --query "id") 
+az aks enable-addons -n myCluster -g myResourceGroup -a ingress-appgw --appgw-id $appgwId
+```
+
+- Peer the Two Virtual Networks Together
+
+Since you deployed the AKS cluster in its own virtual network and the Application gateway in another virtual network, you'll need to peer the two virtual networks together in order for traffic to flow from the Application gateway to the pods in the cluster.
+
+```
+nodeResourceGroup=$(az aks show -n myCluster -g myResourceGroup -o tsv --query "nodeResourceGroup")
+aksVnetName=$(az network vnet list -g $nodeResourceGroup -o tsv --query "[0].name")
+
+aksVnetId=$(az network vnet show -n $aksVnetName -g $nodeResourceGroup -o tsv --query "id")
+az network vnet peering create -n AppGWtoAKSVnetPeering -g myResourceGroup --vnet-name myVnet --remote-vnet $aksVnetId --allow-vnet-access
+
+appGWVnetId=$(az network vnet show -n myVnet -g myResourceGroup -o tsv --query "id")
+az network vnet peering create -n AKStoAppGWVnetPeering -g $nodeResourceGroup --vnet-name $aksVnetName --remote-vnet $appGWVnetId --allow-vnet-access
+```
+
+- Deploy the Ingress on the Frontend Pod
+
+In order to use the ingress controller to expose frontend pod, we need to update the datahub-frontend section of the values.yaml file that was used to deploy DataHub. Here is a sample configuration:
+
+```
+datahub-frontend:
+  enabled: true
+  image:
+    repository: linkedin/datahub-frontend-react
+    # tag: "v0.10.0 # defaults to .global.datahub.version
+
+  # Set up ingress to expose react front-end
+  ingress:
+    enabled: true
+    annotations:
+      kubernetes.io/ingress.class: azure/application-gateway
+      appgw.ingress.kubernetes.io/backend-protocol: "http" 
+    
+    hosts:
+    - paths:
+      - /*
+  defaultUserCredentials: {}
+```
+
+You can then apply the updates:
+
+```
+helm upgrade --install datahub datahub/datahub --values values.yaml
+```
+
+You can now verify that the ingress was created correctly
+
+```
+kubectl get ingress
+```
+
+You should see a result like this:
+
+![frontend-image](https://github.com/Saketh-Mahesh/azure-docs-images/blob/main/frontend-status.png?raw=true)
+
+## Use PostgresSQL for the storage layer
+Configure a PostgreSQL database in the same virtual network as the Kubernetes cluster or implement virtual network peering to connect both networks. Once the database is provisioned, you should be able to see the following page under the Connect tab on the left side. 
+
+
+Note: PostgreSQL Database MUST be deployed in same location as AKS/resource group (eastus, centralus, etc.)
+Take a note of the connection details:
+
+![postgres-info](https://github.com/Saketh-Mahesh/azure-docs-images/blob/main/postgres-info.png?raw=true)
+
+
+
+
+
+- Update the postgresql settings under global in the values.yaml as follows.
+
+```
+global:
+  sql:
+    datasource:
+      host: "${POSTGRES_HOST}.postgres.database.azure.com:5432"
+      hostForpostgresqlClient: "${POSTGRES_HOST}.postgres.database.azure.com"
+      port: "5432"
+      url: "jdbc:postgresql://${POSTGRES_HOST}.postgres.database.azure.com:5432/datahub?user=${POSTGRES_ADMIN_LOGIN}&password=${POSTGRES_ADMIN_PASSWORD}&sslmode=require"
+      driver: "org.postgresql.Driver"
+      username: "${POSTGRES_ADMIN_LOGIN}"
+      password:
+        value: "${POSTGRES_ADMIN_PASSWORD}"
+```
+Run this command helm command to update datahub configuration
+
+```
+helm upgrade --install datahub datahub/datahub --values values.yaml
+```
+
+And there you go! You have now installed DataHub on an Azure Kubernetes Cluster with an ingress controller set up to expose the frontend. Additionally you have utilized PostgreSQL as the storage layer of DataHub.
\ No newline at end of file

From b3ac42b1e4f43ae2ddcd9e884dc182d3a963f99a Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Wed, 18 Oct 2023 13:42:03 -0400
Subject: [PATCH 150/156] feat(frontend/ingestion): Support flagged / warning /
 connection failure statuses; add recipe (#8920)

---
 .../ExecutionRequestDetailsModal.tsx          | 45 +++++++++++++++++--
 .../src/app/ingest/source/utils.ts            | 40 +++++++++++------
 .../src/graphql/ingestion.graphql             |  4 ++
 3 files changed, 73 insertions(+), 16 deletions(-)

diff --git a/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx b/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx
index 849efabdcde97..00fdc89964f88 100644
--- a/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx
+++ b/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx
@@ -2,6 +2,7 @@ import { DownloadOutlined } from '@ant-design/icons';
 import { Button, message, Modal, Typography } from 'antd';
 import React, { useEffect, useState } from 'react';
 import styled from 'styled-components';
+import YAML from 'yamljs';
 import { useGetIngestionExecutionRequestQuery } from '../../../../graphql/ingestion.generated';
 import { ANTD_GRAY } from '../../../entity/shared/constants';
 import { downloadFile } from '../../../search/utils/csvUtils';
@@ -65,6 +66,13 @@ const IngestedAssetsSection = styled.div`
     padding-right: 30px;
 `;
 
+const RecipeSection = styled.div`
+    border-top: 1px solid ${ANTD_GRAY[4]};
+    padding-top: 16px;
+    padding-left: 30px;
+    padding-right: 30px;
+`;
+
 const LogsSection = styled.div`
     padding-top: 16px;
     padding-left: 30px;
@@ -91,6 +99,8 @@ type Props = {
 
 export const ExecutionDetailsModal = ({ urn, visible, onClose }: Props) => {
     const [showExpandedLogs, setShowExpandedLogs] = useState(false);
+    const [showExpandedRecipe, setShowExpandedRecipe] = useState(false);
+
     const { data, loading, error, refetch } = useGetIngestionExecutionRequestQuery({ variables: { urn } });
     const output = data?.executionRequest?.result?.report || 'No output found.';
 
@@ -120,7 +130,18 @@ export const ExecutionDetailsModal = ({ urn, visible, onClose }: Props) => {
     const resultSummaryText =
         (result && <Typography.Text type="secondary">{getExecutionRequestSummaryText(result)}</Typography.Text>) ||
         undefined;
-    const isOutputExpandable = output.length > 100;
+
+    const recipeJson = data?.executionRequest?.input.arguments?.find((arg) => arg.key === 'recipe')?.value;
+    let recipeYaml: string;
+    try {
+        recipeYaml = recipeJson && YAML.stringify(JSON.parse(recipeJson), 8, 2).trim();
+    } catch (e) {
+        recipeYaml = '';
+    }
+    const recipe = showExpandedRecipe ? recipeYaml : recipeYaml?.split('\n').slice(0, 1).join('\n');
+
+    const areLogsExpandable = output.length > 100;
+    const isRecipeExpandable = recipeYaml?.includes('\n');
 
     return (
         <Modal
@@ -161,14 +182,32 @@ export const ExecutionDetailsModal = ({ urn, visible, onClose }: Props) => {
                         </Button>
                     </SectionSubHeader>
                     <Typography.Paragraph ellipsis>
-                        <pre>{`${logs}${!showExpandedLogs && isOutputExpandable ? '...' : ''}`}</pre>
-                        {isOutputExpandable && (
+                        <pre>{`${logs}${!showExpandedLogs && areLogsExpandable ? '...' : ''}`}</pre>
+                        {areLogsExpandable && (
                             <ShowMoreButton type="link" onClick={() => setShowExpandedLogs(!showExpandedLogs)}>
                                 {showExpandedLogs ? 'Hide' : 'Show More'}
                             </ShowMoreButton>
                         )}
                     </Typography.Paragraph>
                 </LogsSection>
+                {recipe && (
+                    <RecipeSection>
+                        <SectionHeader level={5}>Recipe</SectionHeader>
+                        <SectionSubHeader>
+                            <SubHeaderParagraph type="secondary">
+                                The recipe used for this ingestion run.
+                            </SubHeaderParagraph>
+                        </SectionSubHeader>
+                        <Typography.Paragraph ellipsis>
+                            <pre>{`${recipe}${!showExpandedRecipe && isRecipeExpandable ? '\n...' : ''}`}</pre>
+                        </Typography.Paragraph>
+                        {isRecipeExpandable && (
+                            <ShowMoreButton type="link" onClick={() => setShowExpandedRecipe((v) => !v)}>
+                                {showExpandedRecipe ? 'Hide' : 'Show More'}
+                            </ShowMoreButton>
+                        )}
+                    </RecipeSection>
+                )}
             </Section>
         </Modal>
     );
diff --git a/datahub-web-react/src/app/ingest/source/utils.ts b/datahub-web-react/src/app/ingest/source/utils.ts
index c372388e958b7..f789ed8434721 100644
--- a/datahub-web-react/src/app/ingest/source/utils.ts
+++ b/datahub-web-react/src/app/ingest/source/utils.ts
@@ -1,17 +1,19 @@
-import YAML from 'yamljs';
 import {
     CheckCircleOutlined,
     ClockCircleOutlined,
     CloseCircleOutlined,
+    ExclamationCircleOutlined,
     LoadingOutlined,
+    StopOutlined,
     WarningOutlined,
 } from '@ant-design/icons';
-import { ANTD_GRAY, REDESIGN_COLORS } from '../../entity/shared/constants';
+import YAML from 'yamljs';
+import { ListIngestionSourcesDocument, ListIngestionSourcesQuery } from '../../../graphql/ingestion.generated';
 import { EntityType, FacetMetadata } from '../../../types.generated';
-import { capitalizeFirstLetterOnly, pluralize } from '../../shared/textUtil';
 import EntityRegistry from '../../entity/EntityRegistry';
+import { ANTD_GRAY, REDESIGN_COLORS } from '../../entity/shared/constants';
+import { capitalizeFirstLetterOnly, pluralize } from '../../shared/textUtil';
 import { SourceConfig } from './builder/types';
-import { ListIngestionSourcesDocument, ListIngestionSourcesQuery } from '../../../graphql/ingestion.generated';
 
 export const getSourceConfigs = (ingestionSources: SourceConfig[], sourceType: string) => {
     const sourceConfigs = ingestionSources.find((source) => source.name === sourceType);
@@ -40,7 +42,9 @@ export function getPlaceholderRecipe(ingestionSources: SourceConfig[], type?: st
 
 export const RUNNING = 'RUNNING';
 export const SUCCESS = 'SUCCESS';
+export const WARNING = 'WARNING';
 export const FAILURE = 'FAILURE';
+export const CONNECTION_FAILURE = 'CONNECTION_FAILURE';
 export const CANCELLED = 'CANCELLED';
 export const UP_FOR_RETRY = 'UP_FOR_RETRY';
 export const ROLLING_BACK = 'ROLLING_BACK';
@@ -56,8 +60,10 @@ export const getExecutionRequestStatusIcon = (status: string) => {
     return (
         (status === RUNNING && LoadingOutlined) ||
         (status === SUCCESS && CheckCircleOutlined) ||
+        (status === WARNING && ExclamationCircleOutlined) ||
         (status === FAILURE && CloseCircleOutlined) ||
-        (status === CANCELLED && CloseCircleOutlined) ||
+        (status === CONNECTION_FAILURE && CloseCircleOutlined) ||
+        (status === CANCELLED && StopOutlined) ||
         (status === UP_FOR_RETRY && ClockCircleOutlined) ||
         (status === ROLLED_BACK && WarningOutlined) ||
         (status === ROLLING_BACK && LoadingOutlined) ||
@@ -70,7 +76,9 @@ export const getExecutionRequestStatusDisplayText = (status: string) => {
     return (
         (status === RUNNING && 'Running') ||
         (status === SUCCESS && 'Succeeded') ||
+        (status === WARNING && 'Completed') ||
         (status === FAILURE && 'Failed') ||
+        (status === CONNECTION_FAILURE && 'Connection Failed') ||
         (status === CANCELLED && 'Cancelled') ||
         (status === UP_FOR_RETRY && 'Up for Retry') ||
         (status === ROLLED_BACK && 'Rolled Back') ||
@@ -83,21 +91,25 @@ export const getExecutionRequestStatusDisplayText = (status: string) => {
 export const getExecutionRequestSummaryText = (status: string) => {
     switch (status) {
         case RUNNING:
-            return 'Ingestion is running';
+            return 'Ingestion is running...';
         case SUCCESS:
-            return 'Ingestion successfully completed';
+            return 'Ingestion succeeded with no errors or suspected missing data.';
+        case WARNING:
+            return 'Ingestion completed with minor or intermittent errors.';
         case FAILURE:
-            return 'Ingestion completed with errors';
+            return 'Ingestion failed to complete, or completed with serious errors.';
+        case CONNECTION_FAILURE:
+            return 'Ingestion failed due to network, authentication, or permission issues.';
         case CANCELLED:
-            return 'Ingestion was cancelled';
+            return 'Ingestion was cancelled.';
         case ROLLED_BACK:
-            return 'Ingestion was rolled back';
+            return 'Ingestion was rolled back.';
         case ROLLING_BACK:
-            return 'Ingestion is in the process of rolling back';
+            return 'Ingestion is in the process of rolling back.';
         case ROLLBACK_FAILED:
-            return 'Ingestion rollback failed';
+            return 'Ingestion rollback failed.';
         default:
-            return 'Ingestion status not recognized';
+            return 'Ingestion status not recognized.';
     }
 };
 
@@ -105,7 +117,9 @@ export const getExecutionRequestStatusDisplayColor = (status: string) => {
     return (
         (status === RUNNING && REDESIGN_COLORS.BLUE) ||
         (status === SUCCESS && 'green') ||
+        (status === WARNING && 'orangered') ||
         (status === FAILURE && 'red') ||
+        (status === CONNECTION_FAILURE && 'crimson') ||
         (status === UP_FOR_RETRY && 'orange') ||
         (status === CANCELLED && ANTD_GRAY[9]) ||
         (status === ROLLED_BACK && 'orange') ||
diff --git a/datahub-web-react/src/graphql/ingestion.graphql b/datahub-web-react/src/graphql/ingestion.graphql
index 80f66642fe11f..c127e9ec03f9a 100644
--- a/datahub-web-react/src/graphql/ingestion.graphql
+++ b/datahub-web-react/src/graphql/ingestion.graphql
@@ -90,6 +90,10 @@ query getIngestionExecutionRequest($urn: String!) {
             source {
                 type
             }
+            arguments {
+                key
+                value
+            }
         }
         result {
             status

From 1b737243b266843136918ec92f6d20573b999272 Mon Sep 17 00:00:00 2001
From: RyanHolstien <RyanHolstien@users.noreply.github.com>
Date: Wed, 18 Oct 2023 13:45:46 -0500
Subject: [PATCH 151/156] feat(avro): upgrade avro to 1.11 (#9031)

---
 build.gradle                                             | 7 +++----
 buildSrc/build.gradle                                    | 9 ++++++++-
 docker/datahub-frontend/start.sh                         | 1 +
 metadata-dao-impl/kafka-producer/build.gradle            | 4 ++--
 metadata-events/{mxe-avro-1.7 => mxe-avro}/.gitignore    | 0
 metadata-events/{mxe-avro-1.7 => mxe-avro}/build.gradle  | 6 +++---
 metadata-events/mxe-registration/build.gradle            | 2 +-
 metadata-events/mxe-schemas/build.gradle                 | 2 +-
 .../{mxe-utils-avro-1.7 => mxe-utils-avro}/.gitignore    | 0
 .../{mxe-utils-avro-1.7 => mxe-utils-avro}/build.gradle  | 2 +-
 .../src/main/java/com/linkedin/metadata/EventUtils.java  | 0
 .../test/java/com/linkedin/metadata/EventUtilsTests.java | 0
 .../src/test/resources/test-avro2pegasus-mae.json        | 0
 .../src/test/resources/test-avro2pegasus-mce.json        | 0
 .../src/test/resources/test-pegasus2avro-fmce.json       | 0
 .../src/test/resources/test-pegasus2avro-mae.json        | 0
 .../src/test/resources/test-pegasus2avro-mce.json        | 0
 metadata-integration/java/datahub-client/build.gradle    | 2 +-
 .../main/java/datahub/client/kafka/AvroSerializer.java   | 4 +++-
 metadata-io/build.gradle                                 | 4 ++--
 metadata-jobs/mae-consumer/build.gradle                  | 4 ++--
 metadata-jobs/mce-consumer/build.gradle                  | 4 ++--
 metadata-jobs/pe-consumer/build.gradle                   | 4 ++--
 metadata-service/restli-servlet-impl/build.gradle        | 2 +-
 metadata-service/services/build.gradle                   | 4 ++--
 metadata-utils/build.gradle                              | 6 +++---
 settings.gradle                                          | 4 ++--
 27 files changed, 40 insertions(+), 31 deletions(-)
 rename metadata-events/{mxe-avro-1.7 => mxe-avro}/.gitignore (100%)
 rename metadata-events/{mxe-avro-1.7 => mxe-avro}/build.gradle (81%)
 rename metadata-events/{mxe-utils-avro-1.7 => mxe-utils-avro}/.gitignore (100%)
 rename metadata-events/{mxe-utils-avro-1.7 => mxe-utils-avro}/build.gradle (95%)
 rename metadata-events/{mxe-utils-avro-1.7 => mxe-utils-avro}/src/main/java/com/linkedin/metadata/EventUtils.java (100%)
 rename metadata-events/{mxe-utils-avro-1.7 => mxe-utils-avro}/src/test/java/com/linkedin/metadata/EventUtilsTests.java (100%)
 rename metadata-events/{mxe-utils-avro-1.7 => mxe-utils-avro}/src/test/resources/test-avro2pegasus-mae.json (100%)
 rename metadata-events/{mxe-utils-avro-1.7 => mxe-utils-avro}/src/test/resources/test-avro2pegasus-mce.json (100%)
 rename metadata-events/{mxe-utils-avro-1.7 => mxe-utils-avro}/src/test/resources/test-pegasus2avro-fmce.json (100%)
 rename metadata-events/{mxe-utils-avro-1.7 => mxe-utils-avro}/src/test/resources/test-pegasus2avro-mae.json (100%)
 rename metadata-events/{mxe-utils-avro-1.7 => mxe-utils-avro}/src/test/resources/test-pegasus2avro-mce.json (100%)

diff --git a/build.gradle b/build.gradle
index 025c588da2b52..cf55a59cfe694 100644
--- a/build.gradle
+++ b/build.gradle
@@ -27,7 +27,7 @@ buildscript {
   dependencies {
     classpath 'com.linkedin.pegasus:gradle-plugins:' + pegasusVersion
     classpath 'com.github.node-gradle:gradle-node-plugin:2.2.4'
-    classpath 'io.acryl.gradle.plugin:gradle-avro-plugin:0.8.1'
+    classpath 'io.acryl.gradle.plugin:gradle-avro-plugin:0.2.0'
     classpath 'org.springframework.boot:spring-boot-gradle-plugin:' + springBootVersion
     classpath "io.codearte.gradle.nexus:gradle-nexus-staging-plugin:0.30.0"
     classpath "com.palantir.gradle.gitversion:gradle-git-version:3.0.0"
@@ -67,8 +67,8 @@ project.ext.externalDependency = [
     'antlr4Runtime': 'org.antlr:antlr4-runtime:4.7.2',
     'antlr4': 'org.antlr:antlr4:4.7.2',
     'assertJ': 'org.assertj:assertj-core:3.11.1',
-    'avro_1_7': 'org.apache.avro:avro:1.7.7',
-    'avroCompiler_1_7': 'org.apache.avro:avro-compiler:1.7.7',
+    'avro': 'org.apache.avro:avro:1.11.3',
+    'avroCompiler': 'org.apache.avro:avro-compiler:1.11.3',
     'awsGlueSchemaRegistrySerde': 'software.amazon.glue:schema-registry-serde:1.1.10',
     'awsMskIamAuth': 'software.amazon.msk:aws-msk-iam-auth:1.1.1',
     'awsSecretsManagerJdbc': 'com.amazonaws.secretsmanager:aws-secretsmanager-jdbc:1.0.8',
@@ -127,7 +127,6 @@ project.ext.externalDependency = [
     'jgrapht': 'org.jgrapht:jgrapht-core:1.5.1',
     'jna': 'net.java.dev.jna:jna:5.12.1',
     'jsonPatch': 'com.github.java-json-tools:json-patch:1.13',
-    'jsonSchemaAvro': 'com.github.fge:json-schema-avro:0.1.4',
     'jsonSimple': 'com.googlecode.json-simple:json-simple:1.1.1',
     'jsonSmart': 'net.minidev:json-smart:2.4.9',
     'json': 'org.json:json:20230227',
diff --git a/buildSrc/build.gradle b/buildSrc/build.gradle
index 65b3780431db9..1f9d30d520171 100644
--- a/buildSrc/build.gradle
+++ b/buildSrc/build.gradle
@@ -5,7 +5,14 @@ buildscript {
 }
 
 dependencies {
-  implementation('io.acryl:json-schema-avro:0.1.5') {
+  /**
+   * Forked version of abandoned repository: https://github.com/fge/json-schema-avro
+   * Maintainer last active 2014, we maintain an active fork of this repository to utilize mapping Avro schemas to Json Schemas,
+   * repository is as close to official library for this as you can get. Original maintainer is one of the authors of Json Schema spec.
+   * Other companies are also separately maintaining forks (like: https://github.com/java-json-tools/json-schema-avro).
+   * We have built several customizations on top of it for various bug fixes, especially around union scheams
+   */
+  implementation('io.acryl:json-schema-avro:0.2.2') {
     exclude group: 'com.fasterxml.jackson.core', module: 'jackson-databind'
     exclude group: 'com.google.guava', module: 'guava'
   }
diff --git a/docker/datahub-frontend/start.sh b/docker/datahub-frontend/start.sh
index 9dc1514144bb1..430982aa2456b 100755
--- a/docker/datahub-frontend/start.sh
+++ b/docker/datahub-frontend/start.sh
@@ -50,6 +50,7 @@ export JAVA_OPTS="-Xms512m \
    -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf \
    -Dlogback.configurationFile=datahub-frontend/conf/logback.xml \
    -Dlogback.debug=false \
+   -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=5005 \
    ${PROMETHEUS_AGENT:-} ${OTEL_AGENT:-} \
    ${TRUSTSTORE_FILE:-} ${TRUSTSTORE_TYPE:-} ${TRUSTSTORE_PASSWORD:-} \
    ${HTTP_PROXY:-} ${HTTPS_PROXY:-} ${NO_PROXY:-} \
diff --git a/metadata-dao-impl/kafka-producer/build.gradle b/metadata-dao-impl/kafka-producer/build.gradle
index 393b10b0e9d24..bc3415b2ccc8c 100644
--- a/metadata-dao-impl/kafka-producer/build.gradle
+++ b/metadata-dao-impl/kafka-producer/build.gradle
@@ -1,9 +1,9 @@
 apply plugin: 'java'
 
 dependencies {
-  implementation project(':metadata-events:mxe-avro-1.7')
+  implementation project(':metadata-events:mxe-avro')
   implementation project(':metadata-events:mxe-registration')
-  implementation project(':metadata-events:mxe-utils-avro-1.7')
+  implementation project(':metadata-events:mxe-utils-avro')
   implementation project(':entity-registry')
   implementation project(':metadata-io')
 
diff --git a/metadata-events/mxe-avro-1.7/.gitignore b/metadata-events/mxe-avro/.gitignore
similarity index 100%
rename from metadata-events/mxe-avro-1.7/.gitignore
rename to metadata-events/mxe-avro/.gitignore
diff --git a/metadata-events/mxe-avro-1.7/build.gradle b/metadata-events/mxe-avro/build.gradle
similarity index 81%
rename from metadata-events/mxe-avro-1.7/build.gradle
rename to metadata-events/mxe-avro/build.gradle
index 8c0a26d22dc7d..9d11eeb160ff0 100644
--- a/metadata-events/mxe-avro-1.7/build.gradle
+++ b/metadata-events/mxe-avro/build.gradle
@@ -6,8 +6,8 @@ apply plugin: 'io.acryl.gradle.plugin.avro'
 apply plugin: 'java-library'
 
 dependencies {
-  api externalDependency.avro_1_7
-  implementation(externalDependency.avroCompiler_1_7) {
+  api externalDependency.avro
+  implementation(externalDependency.avroCompiler) {
     exclude group: 'org.apache.velocity', module: 'velocity'
   }
   constraints {
@@ -21,7 +21,7 @@ dependencies {
 
 def genDir = file("src/generated/java")
 
-task avroCodeGen(type: com.commercehub.gradle.plugin.avro.GenerateAvroJavaTask, dependsOn: configurations.avsc) {
+task avroCodeGen(type: com.github.davidmc24.gradle.plugin.avro.GenerateAvroJavaTask, dependsOn: configurations.avsc) {
   source("$rootDir/metadata-events/mxe-schemas/src/renamed/avro")
   outputDir = genDir
   dependsOn(':metadata-events:mxe-schemas:renameNamespace')
diff --git a/metadata-events/mxe-registration/build.gradle b/metadata-events/mxe-registration/build.gradle
index 60e0da59616d9..032870d93329f 100644
--- a/metadata-events/mxe-registration/build.gradle
+++ b/metadata-events/mxe-registration/build.gradle
@@ -5,7 +5,7 @@ configurations {
 }
 
 dependencies {
-  implementation project(':metadata-events:mxe-avro-1.7')
+  implementation project(':metadata-events:mxe-avro')
   implementation project(':metadata-models')
   implementation spec.product.pegasus.dataAvro1_6
 
diff --git a/metadata-events/mxe-schemas/build.gradle b/metadata-events/mxe-schemas/build.gradle
index fe46601fb68b7..8dc8b71bd1cd8 100644
--- a/metadata-events/mxe-schemas/build.gradle
+++ b/metadata-events/mxe-schemas/build.gradle
@@ -1,4 +1,4 @@
-apply plugin: 'java'
+apply plugin: 'java-library'
 apply plugin: 'pegasus'
 
 dependencies {
diff --git a/metadata-events/mxe-utils-avro-1.7/.gitignore b/metadata-events/mxe-utils-avro/.gitignore
similarity index 100%
rename from metadata-events/mxe-utils-avro-1.7/.gitignore
rename to metadata-events/mxe-utils-avro/.gitignore
diff --git a/metadata-events/mxe-utils-avro-1.7/build.gradle b/metadata-events/mxe-utils-avro/build.gradle
similarity index 95%
rename from metadata-events/mxe-utils-avro-1.7/build.gradle
rename to metadata-events/mxe-utils-avro/build.gradle
index 3b137965d6c19..a7bf287ab224d 100644
--- a/metadata-events/mxe-utils-avro-1.7/build.gradle
+++ b/metadata-events/mxe-utils-avro/build.gradle
@@ -1,7 +1,7 @@
 apply plugin: 'java-library'
 
 dependencies {
-  api project(':metadata-events:mxe-avro-1.7')
+  api project(':metadata-events:mxe-avro')
   api project(':metadata-models')
   api spec.product.pegasus.dataAvro1_6
 
diff --git a/metadata-events/mxe-utils-avro-1.7/src/main/java/com/linkedin/metadata/EventUtils.java b/metadata-events/mxe-utils-avro/src/main/java/com/linkedin/metadata/EventUtils.java
similarity index 100%
rename from metadata-events/mxe-utils-avro-1.7/src/main/java/com/linkedin/metadata/EventUtils.java
rename to metadata-events/mxe-utils-avro/src/main/java/com/linkedin/metadata/EventUtils.java
diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/java/com/linkedin/metadata/EventUtilsTests.java b/metadata-events/mxe-utils-avro/src/test/java/com/linkedin/metadata/EventUtilsTests.java
similarity index 100%
rename from metadata-events/mxe-utils-avro-1.7/src/test/java/com/linkedin/metadata/EventUtilsTests.java
rename to metadata-events/mxe-utils-avro/src/test/java/com/linkedin/metadata/EventUtilsTests.java
diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-avro2pegasus-mae.json b/metadata-events/mxe-utils-avro/src/test/resources/test-avro2pegasus-mae.json
similarity index 100%
rename from metadata-events/mxe-utils-avro-1.7/src/test/resources/test-avro2pegasus-mae.json
rename to metadata-events/mxe-utils-avro/src/test/resources/test-avro2pegasus-mae.json
diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-avro2pegasus-mce.json b/metadata-events/mxe-utils-avro/src/test/resources/test-avro2pegasus-mce.json
similarity index 100%
rename from metadata-events/mxe-utils-avro-1.7/src/test/resources/test-avro2pegasus-mce.json
rename to metadata-events/mxe-utils-avro/src/test/resources/test-avro2pegasus-mce.json
diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-fmce.json b/metadata-events/mxe-utils-avro/src/test/resources/test-pegasus2avro-fmce.json
similarity index 100%
rename from metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-fmce.json
rename to metadata-events/mxe-utils-avro/src/test/resources/test-pegasus2avro-fmce.json
diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-mae.json b/metadata-events/mxe-utils-avro/src/test/resources/test-pegasus2avro-mae.json
similarity index 100%
rename from metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-mae.json
rename to metadata-events/mxe-utils-avro/src/test/resources/test-pegasus2avro-mae.json
diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-mce.json b/metadata-events/mxe-utils-avro/src/test/resources/test-pegasus2avro-mce.json
similarity index 100%
rename from metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-mce.json
rename to metadata-events/mxe-utils-avro/src/test/resources/test-pegasus2avro-mce.json
diff --git a/metadata-integration/java/datahub-client/build.gradle b/metadata-integration/java/datahub-client/build.gradle
index 95de3cdb3c526..e6210f1f073f6 100644
--- a/metadata-integration/java/datahub-client/build.gradle
+++ b/metadata-integration/java/datahub-client/build.gradle
@@ -30,7 +30,7 @@ dependencies {
   implementation(externalDependency.kafkaAvroSerializer) {
     exclude group: "org.apache.avro"
   }
-  implementation externalDependency.avro_1_7
+  implementation externalDependency.avro
   constraints {
     implementation('commons-collections:commons-collections:3.2.2') {
       because 'Vulnerability Issue'
diff --git a/metadata-integration/java/datahub-client/src/main/java/datahub/client/kafka/AvroSerializer.java b/metadata-integration/java/datahub-client/src/main/java/datahub/client/kafka/AvroSerializer.java
index ee0d459aaa7d3..6212e57470be4 100644
--- a/metadata-integration/java/datahub-client/src/main/java/datahub/client/kafka/AvroSerializer.java
+++ b/metadata-integration/java/datahub-client/src/main/java/datahub/client/kafka/AvroSerializer.java
@@ -16,12 +16,14 @@ class AvroSerializer {
 
   private final Schema _recordSchema;
   private final Schema _genericAspectSchema;
+  private final Schema _changeTypeEnumSchema;
   private final EventFormatter _eventFormatter;
 
   public AvroSerializer() throws IOException {
     _recordSchema = new Schema.Parser()
         .parse(this.getClass().getClassLoader().getResourceAsStream("MetadataChangeProposal.avsc"));
     _genericAspectSchema = this._recordSchema.getField("aspect").schema().getTypes().get(1);
+    _changeTypeEnumSchema = this._recordSchema.getField("changeType").schema();
     _eventFormatter = new EventFormatter(EventFormatter.Format.PEGASUS_JSON);
   }
 
@@ -43,7 +45,7 @@ public GenericRecord serialize(MetadataChangeProposal mcp) throws IOException {
     genericRecord.put("aspect", genericAspect);
     genericRecord.put("aspectName", mcp.getAspectName());
     genericRecord.put("entityType", mcp.getEntityType());
-    genericRecord.put("changeType", mcp.getChangeType());
+    genericRecord.put("changeType", new GenericData.EnumSymbol(_changeTypeEnumSchema, mcp.getChangeType()));
     return genericRecord;
   }
 }
\ No newline at end of file
diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle
index ad54cf6524398..740fed61f13d5 100644
--- a/metadata-io/build.gradle
+++ b/metadata-io/build.gradle
@@ -8,9 +8,9 @@ configurations {
 dependencies {
   implementation project(':entity-registry')
   api project(':metadata-utils')
-  api project(':metadata-events:mxe-avro-1.7')
+  api project(':metadata-events:mxe-avro')
   api project(':metadata-events:mxe-registration')
-  api project(':metadata-events:mxe-utils-avro-1.7')
+  api project(':metadata-events:mxe-utils-avro')
   api project(':metadata-models')
   api project(':metadata-service:restli-client')
   api project(':metadata-service:configuration')
diff --git a/metadata-jobs/mae-consumer/build.gradle b/metadata-jobs/mae-consumer/build.gradle
index d36fd0de40d03..fcb8b62e4ac9d 100644
--- a/metadata-jobs/mae-consumer/build.gradle
+++ b/metadata-jobs/mae-consumer/build.gradle
@@ -21,9 +21,9 @@ dependencies {
     implementation project(':ingestion-scheduler')
     implementation project(':metadata-utils')
     implementation project(":entity-registry")
-    implementation project(':metadata-events:mxe-avro-1.7')
+    implementation project(':metadata-events:mxe-avro')
     implementation project(':metadata-events:mxe-registration')
-    implementation project(':metadata-events:mxe-utils-avro-1.7')
+    implementation project(':metadata-events:mxe-utils-avro')
     implementation project(':datahub-graphql-core')
 
     implementation externalDependency.elasticSearchRest
diff --git a/metadata-jobs/mce-consumer/build.gradle b/metadata-jobs/mce-consumer/build.gradle
index 0bca55e0e5f92..97eec9fcff051 100644
--- a/metadata-jobs/mce-consumer/build.gradle
+++ b/metadata-jobs/mce-consumer/build.gradle
@@ -17,9 +17,9 @@ dependencies {
     }
     implementation project(':metadata-utils')
     implementation project(':metadata-events:mxe-schemas')
-    implementation project(':metadata-events:mxe-avro-1.7')
+    implementation project(':metadata-events:mxe-avro')
     implementation project(':metadata-events:mxe-registration')
-    implementation project(':metadata-events:mxe-utils-avro-1.7')
+    implementation project(':metadata-events:mxe-utils-avro')
     implementation project(':metadata-io')
     implementation project(':metadata-service:restli-client')
     implementation spec.product.pegasus.restliClient
diff --git a/metadata-jobs/pe-consumer/build.gradle b/metadata-jobs/pe-consumer/build.gradle
index 1899a4de15635..81e8b8c9971f0 100644
--- a/metadata-jobs/pe-consumer/build.gradle
+++ b/metadata-jobs/pe-consumer/build.gradle
@@ -10,9 +10,9 @@ configurations {
 dependencies {
   avro project(path: ':metadata-models', configuration: 'avroSchema')
   implementation project(':li-utils')
-  implementation project(':metadata-events:mxe-avro-1.7')
+  implementation project(':metadata-events:mxe-avro')
   implementation project(':metadata-events:mxe-registration')
-  implementation project(':metadata-events:mxe-utils-avro-1.7')
+  implementation project(':metadata-events:mxe-utils-avro')
   implementation(project(':metadata-service:factories')) {
     exclude group: 'org.neo4j.test'
   }
diff --git a/metadata-service/restli-servlet-impl/build.gradle b/metadata-service/restli-servlet-impl/build.gradle
index cb307863748c3..de6fb6690e693 100644
--- a/metadata-service/restli-servlet-impl/build.gradle
+++ b/metadata-service/restli-servlet-impl/build.gradle
@@ -48,7 +48,7 @@ dependencies {
   implementation externalDependency.dropwizardMetricsCore
   implementation externalDependency.dropwizardMetricsJmx
 
-  compileOnly externalDependency.lombok
+  implementation externalDependency.lombok
   implementation externalDependency.neo4jJavaDriver
   implementation externalDependency.opentelemetryAnnotations
 
diff --git a/metadata-service/services/build.gradle b/metadata-service/services/build.gradle
index 22c62af324c12..b6af3d330d185 100644
--- a/metadata-service/services/build.gradle
+++ b/metadata-service/services/build.gradle
@@ -9,9 +9,9 @@ dependencies {
     implementation externalDependency.jsonPatch
     implementation project(':entity-registry')
     implementation project(':metadata-utils')
-    implementation project(':metadata-events:mxe-avro-1.7')
+    implementation project(':metadata-events:mxe-avro')
     implementation project(':metadata-events:mxe-registration')
-    implementation project(':metadata-events:mxe-utils-avro-1.7')
+    implementation project(':metadata-events:mxe-utils-avro')
     implementation project(':metadata-models')
     implementation project(':metadata-service:restli-client')
     implementation project(':metadata-service:configuration')
diff --git a/metadata-utils/build.gradle b/metadata-utils/build.gradle
index 1c1c368611488..7bc6aa2d43442 100644
--- a/metadata-utils/build.gradle
+++ b/metadata-utils/build.gradle
@@ -1,7 +1,7 @@
 apply plugin: 'java-library'
 
 dependencies {
-  api externalDependency.avro_1_7
+  api externalDependency.avro
   implementation externalDependency.commonsLang
   api externalDependency.dropwizardMetricsCore
   implementation externalDependency.dropwizardMetricsJmx
@@ -16,8 +16,8 @@ dependencies {
 
   api project(':li-utils')
   api project(':entity-registry')
-  api project(':metadata-events:mxe-avro-1.7')
-  api project(':metadata-events:mxe-utils-avro-1.7')
+  api project(':metadata-events:mxe-avro')
+  api project(':metadata-events:mxe-utils-avro')
 
   implementation externalDependency.slf4jApi
   compileOnly externalDependency.lombok
diff --git a/settings.gradle b/settings.gradle
index d6777b07b3fb3..52de461383b5e 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -20,10 +20,10 @@ include 'metadata-service:openapi-analytics-servlet'
 include 'metadata-service:plugin'
 include 'metadata-service:plugin:src:test:sample-test-plugins'
 include 'metadata-dao-impl:kafka-producer'
-include 'metadata-events:mxe-avro-1.7'
+include 'metadata-events:mxe-avro'
 include 'metadata-events:mxe-registration'
 include 'metadata-events:mxe-schemas'
-include 'metadata-events:mxe-utils-avro-1.7'
+include 'metadata-events:mxe-utils-avro'
 include 'metadata-ingestion'
 include 'metadata-jobs:mae-consumer'
 include 'metadata-jobs:mce-consumer'

From aae1347efce9edf1b5c4512ba3c72569e165947d Mon Sep 17 00:00:00 2001
From: Indy Prentice <iprentic@users.noreply.github.com>
Date: Wed, 18 Oct 2023 16:26:24 -0300
Subject: [PATCH 152/156] fix(search): Detect field type for use in defining
 the sort order (#8992)

Co-authored-by: Indy Prentice <indy@Indys-MacBook-Pro.local>
---
 .../indexbuilder/MappingsBuilder.java         | 48 +++++-------
 .../query/request/SearchRequestHandler.java   |  8 +-
 .../metadata/search/utils/ESUtils.java        | 74 ++++++++++++++++++-
 .../fixtures/SampleDataFixtureTestBase.java   | 64 ++++++++++++++--
 4 files changed, 154 insertions(+), 40 deletions(-)

diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java
index 004b2e0a2adc4..1edc77bbd214c 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java
@@ -5,6 +5,7 @@
 import com.linkedin.metadata.models.SearchScoreFieldSpec;
 import com.linkedin.metadata.models.SearchableFieldSpec;
 import com.linkedin.metadata.models.annotation.SearchableAnnotation.FieldType;
+import com.linkedin.metadata.search.utils.ESUtils;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -31,15 +32,6 @@ public static Map<String, String> getPartialNgramConfigWithOverrides(Map<String,
 
   public static final Map<String, String> KEYWORD_TYPE_MAP = ImmutableMap.of(TYPE, KEYWORD);
 
-  // Field Types
-  public static final String BOOLEAN = "boolean";
-  public static final String DATE = "date";
-  public static final String DOUBLE = "double";
-  public static final String LONG = "long";
-  public static final String OBJECT = "object";
-  public static final String TEXT = "text";
-  public static final String TOKEN_COUNT = "token_count";
-
   // Subfields
   public static final String DELIMITED = "delimited";
   public static final String LENGTH = "length";
@@ -74,7 +66,7 @@ public static Map<String, Object> getMappings(@Nonnull final EntitySpec entitySp
   private static Map<String, Object> getMappingsForUrn() {
     Map<String, Object> subFields = new HashMap<>();
     subFields.put(DELIMITED, ImmutableMap.of(
-            TYPE, TEXT,
+            TYPE, ESUtils.TEXT_FIELD_TYPE,
             ANALYZER, URN_ANALYZER,
             SEARCH_ANALYZER, URN_SEARCH_ANALYZER,
             SEARCH_QUOTE_ANALYZER, CUSTOM_QUOTE_ANALYZER)
@@ -85,13 +77,13 @@ private static Map<String, Object> getMappingsForUrn() {
             )
     ));
     return ImmutableMap.<String, Object>builder()
-            .put(TYPE, KEYWORD)
+            .put(TYPE, ESUtils.KEYWORD_FIELD_TYPE)
             .put(FIELDS, subFields)
             .build();
   }
 
   private static Map<String, Object> getMappingsForRunId() {
-    return ImmutableMap.<String, Object>builder().put(TYPE, KEYWORD).build();
+    return ImmutableMap.<String, Object>builder().put(TYPE, ESUtils.KEYWORD_FIELD_TYPE).build();
   }
 
   private static Map<String, Object> getMappingsForField(@Nonnull final SearchableFieldSpec searchableFieldSpec) {
@@ -104,23 +96,23 @@ private static Map<String, Object> getMappingsForField(@Nonnull final Searchable
     } else if (fieldType == FieldType.TEXT || fieldType == FieldType.TEXT_PARTIAL || fieldType == FieldType.WORD_GRAM) {
       mappingForField.putAll(getMappingsForSearchText(fieldType));
     } else if (fieldType == FieldType.BROWSE_PATH) {
-      mappingForField.put(TYPE, TEXT);
+      mappingForField.put(TYPE, ESUtils.TEXT_FIELD_TYPE);
       mappingForField.put(FIELDS,
           ImmutableMap.of(LENGTH, ImmutableMap.of(
-              TYPE, TOKEN_COUNT,
+              TYPE, ESUtils.TOKEN_COUNT_FIELD_TYPE,
               ANALYZER, SLASH_PATTERN_ANALYZER)));
       mappingForField.put(ANALYZER, BROWSE_PATH_HIERARCHY_ANALYZER);
       mappingForField.put(FIELDDATA, true);
     } else if (fieldType == FieldType.BROWSE_PATH_V2) {
-      mappingForField.put(TYPE, TEXT);
+      mappingForField.put(TYPE, ESUtils.TEXT_FIELD_TYPE);
       mappingForField.put(FIELDS,
           ImmutableMap.of(LENGTH, ImmutableMap.of(
-              TYPE, TOKEN_COUNT,
+              TYPE, ESUtils.TOKEN_COUNT_FIELD_TYPE,
               ANALYZER, UNIT_SEPARATOR_PATTERN_ANALYZER)));
       mappingForField.put(ANALYZER, BROWSE_PATH_V2_HIERARCHY_ANALYZER);
       mappingForField.put(FIELDDATA, true);
     }  else if (fieldType == FieldType.URN || fieldType == FieldType.URN_PARTIAL) {
-      mappingForField.put(TYPE, TEXT);
+      mappingForField.put(TYPE, ESUtils.TEXT_FIELD_TYPE);
       mappingForField.put(ANALYZER, URN_ANALYZER);
       mappingForField.put(SEARCH_ANALYZER, URN_SEARCH_ANALYZER);
       mappingForField.put(SEARCH_QUOTE_ANALYZER, CUSTOM_QUOTE_ANALYZER);
@@ -135,13 +127,13 @@ private static Map<String, Object> getMappingsForField(@Nonnull final Searchable
       subFields.put(KEYWORD, KEYWORD_TYPE_MAP);
       mappingForField.put(FIELDS, subFields);
     } else if (fieldType == FieldType.BOOLEAN) {
-      mappingForField.put(TYPE, BOOLEAN);
+      mappingForField.put(TYPE, ESUtils.BOOLEAN_FIELD_TYPE);
     } else if (fieldType == FieldType.COUNT) {
-      mappingForField.put(TYPE, LONG);
+      mappingForField.put(TYPE, ESUtils.LONG_FIELD_TYPE);
     } else if (fieldType == FieldType.DATETIME) {
-      mappingForField.put(TYPE, DATE);
+      mappingForField.put(TYPE, ESUtils.DATE_FIELD_TYPE);
     } else if (fieldType == FieldType.OBJECT) {
-      mappingForField.put(TYPE, OBJECT);
+      mappingForField.put(TYPE, ESUtils.DATE_FIELD_TYPE);
     } else {
       log.info("FieldType {} has no mappings implemented", fieldType);
     }
@@ -149,10 +141,10 @@ private static Map<String, Object> getMappingsForField(@Nonnull final Searchable
 
     searchableFieldSpec.getSearchableAnnotation()
         .getHasValuesFieldName()
-        .ifPresent(fieldName -> mappings.put(fieldName, ImmutableMap.of(TYPE, BOOLEAN)));
+        .ifPresent(fieldName -> mappings.put(fieldName, ImmutableMap.of(TYPE, ESUtils.BOOLEAN_FIELD_TYPE)));
     searchableFieldSpec.getSearchableAnnotation()
         .getNumValuesFieldName()
-        .ifPresent(fieldName -> mappings.put(fieldName, ImmutableMap.of(TYPE, LONG)));
+        .ifPresent(fieldName -> mappings.put(fieldName, ImmutableMap.of(TYPE, ESUtils.LONG_FIELD_TYPE)));
     mappings.putAll(getMappingsForFieldNameAliases(searchableFieldSpec));
 
     return mappings;
@@ -160,7 +152,7 @@ private static Map<String, Object> getMappingsForField(@Nonnull final Searchable
 
   private static Map<String, Object> getMappingsForKeyword() {
     Map<String, Object> mappingForField = new HashMap<>();
-    mappingForField.put(TYPE, KEYWORD);
+    mappingForField.put(TYPE, ESUtils.KEYWORD_FIELD_TYPE);
     mappingForField.put(NORMALIZER, KEYWORD_NORMALIZER);
     // Add keyword subfield without lowercase filter
     mappingForField.put(FIELDS, ImmutableMap.of(KEYWORD, KEYWORD_TYPE_MAP));
@@ -169,7 +161,7 @@ private static Map<String, Object> getMappingsForKeyword() {
 
   private static Map<String, Object> getMappingsForSearchText(FieldType fieldType) {
     Map<String, Object> mappingForField = new HashMap<>();
-    mappingForField.put(TYPE, KEYWORD);
+    mappingForField.put(TYPE, ESUtils.KEYWORD_FIELD_TYPE);
     mappingForField.put(NORMALIZER, KEYWORD_NORMALIZER);
     Map<String, Object> subFields = new HashMap<>();
     if (fieldType == FieldType.TEXT_PARTIAL || fieldType == FieldType.WORD_GRAM) {
@@ -186,14 +178,14 @@ private static Map<String, Object> getMappingsForSearchText(FieldType fieldType)
           String fieldName = entry.getKey();
           String analyzerName = entry.getValue();
           subFields.put(fieldName, ImmutableMap.of(
-                  TYPE, TEXT,
+                  TYPE, ESUtils.TEXT_FIELD_TYPE,
                   ANALYZER, analyzerName
           ));
         }
       }
     }
     subFields.put(DELIMITED, ImmutableMap.of(
-            TYPE, TEXT,
+            TYPE, ESUtils.TEXT_FIELD_TYPE,
             ANALYZER, TEXT_ANALYZER,
             SEARCH_ANALYZER, TEXT_SEARCH_ANALYZER,
             SEARCH_QUOTE_ANALYZER, CUSTOM_QUOTE_ANALYZER));
@@ -206,7 +198,7 @@ private static Map<String, Object> getMappingsForSearchText(FieldType fieldType)
   private static Map<String, Object> getMappingsForSearchScoreField(
       @Nonnull final SearchScoreFieldSpec searchScoreFieldSpec) {
     return ImmutableMap.of(searchScoreFieldSpec.getSearchScoreAnnotation().getFieldName(),
-        ImmutableMap.of(TYPE, DOUBLE));
+        ImmutableMap.of(TYPE, ESUtils.DOUBLE_FIELD_TYPE));
   }
 
   private static Map<String, Object> getMappingsForFieldNameAliases(@Nonnull final SearchableFieldSpec searchableFieldSpec) {
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java
index 5fcc10b7af5cf..c06907e800d5e 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java
@@ -202,7 +202,7 @@ public SearchRequest getSearchRequest(@Nonnull String input, @Nullable Filter fi
     if (!finalSearchFlags.isSkipHighlighting()) {
       searchSourceBuilder.highlighter(_highlights);
     }
-    ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion);
+    ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs);
 
     if (finalSearchFlags.isGetSuggestions()) {
       ESUtils.buildNameSuggestions(searchSourceBuilder, input);
@@ -243,7 +243,7 @@ public SearchRequest getSearchRequest(@Nonnull String input, @Nullable Filter fi
     searchSourceBuilder.query(QueryBuilders.boolQuery().must(getQuery(input, finalSearchFlags.isFulltext())).filter(filterQuery));
     _aggregationQueryBuilder.getAggregations().forEach(searchSourceBuilder::aggregation);
     searchSourceBuilder.highlighter(getHighlights());
-    ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion);
+    ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs);
     searchRequest.source(searchSourceBuilder);
     log.debug("Search request is: " + searchRequest);
     searchRequest.indicesOptions(null);
@@ -270,7 +270,7 @@ public SearchRequest getFilterRequest(@Nullable Filter filters, @Nullable SortCr
     final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
     searchSourceBuilder.query(filterQuery);
     searchSourceBuilder.from(from).size(size);
-    ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion);
+    ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs);
     searchRequest.source(searchSourceBuilder);
 
     return searchRequest;
@@ -301,7 +301,7 @@ public SearchRequest getFilterRequest(@Nullable Filter filters, @Nullable SortCr
     searchSourceBuilder.size(size);
 
     ESUtils.setSearchAfter(searchSourceBuilder, sort, pitId, keepAlive);
-    ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion);
+    ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs);
     searchRequest.source(searchSourceBuilder);
 
     return searchRequest;
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java
index 9a7d9a1b4c420..53765acb8e29e 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java
@@ -2,6 +2,9 @@
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
+import com.linkedin.metadata.models.EntitySpec;
+import com.linkedin.metadata.models.SearchableFieldSpec;
+import com.linkedin.metadata.models.annotation.SearchableAnnotation;
 import com.linkedin.metadata.query.filter.Condition;
 import com.linkedin.metadata.query.filter.ConjunctiveCriterion;
 import com.linkedin.metadata.query.filter.Criterion;
@@ -49,7 +52,28 @@ public class ESUtils {
   public static final int MAX_RESULT_SIZE = 10000;
   public static final String OPAQUE_ID_HEADER = "X-Opaque-Id";
   public static final String HEADER_VALUE_DELIMITER = "|";
-  public static final String KEYWORD_TYPE = "keyword";
+
+  // Field types
+  public static final String KEYWORD_FIELD_TYPE = "keyword";
+  public static final String BOOLEAN_FIELD_TYPE = "boolean";
+  public static final String DATE_FIELD_TYPE = "date";
+  public static final String DOUBLE_FIELD_TYPE = "double";
+  public static final String LONG_FIELD_TYPE = "long";
+  public static final String OBJECT_FIELD_TYPE = "object";
+  public static final String TEXT_FIELD_TYPE = "text";
+  public static final String TOKEN_COUNT_FIELD_TYPE = "token_count";
+  // End of field types
+
+  public static final Set<SearchableAnnotation.FieldType> FIELD_TYPES_STORED_AS_KEYWORD = Set.of(
+      SearchableAnnotation.FieldType.KEYWORD,
+      SearchableAnnotation.FieldType.TEXT,
+      SearchableAnnotation.FieldType.TEXT_PARTIAL,
+      SearchableAnnotation.FieldType.WORD_GRAM);
+  public static final Set<SearchableAnnotation.FieldType> FIELD_TYPES_STORED_AS_TEXT = Set.of(
+      SearchableAnnotation.FieldType.BROWSE_PATH,
+      SearchableAnnotation.FieldType.BROWSE_PATH_V2,
+      SearchableAnnotation.FieldType.URN,
+      SearchableAnnotation.FieldType.URN_PARTIAL);
   public static final String ENTITY_NAME_FIELD = "_entityName";
   public static final String NAME_SUGGESTION = "nameSuggestion";
 
@@ -174,6 +198,25 @@ public static QueryBuilder getQueryBuilderFromCriterion(@Nonnull final Criterion
     return getQueryBuilderFromCriterionForSingleField(criterion, isTimeseries);
   }
 
+  public static String getElasticTypeForFieldType(SearchableAnnotation.FieldType fieldType) {
+    if (FIELD_TYPES_STORED_AS_KEYWORD.contains(fieldType)) {
+      return KEYWORD_FIELD_TYPE;
+    } else if (FIELD_TYPES_STORED_AS_TEXT.contains(fieldType)) {
+      return TEXT_FIELD_TYPE;
+    } else if (fieldType == SearchableAnnotation.FieldType.BOOLEAN) {
+      return BOOLEAN_FIELD_TYPE;
+    } else if (fieldType == SearchableAnnotation.FieldType.COUNT) {
+      return LONG_FIELD_TYPE;
+    } else if (fieldType == SearchableAnnotation.FieldType.DATETIME) {
+      return DATE_FIELD_TYPE;
+    } else if (fieldType == SearchableAnnotation.FieldType.OBJECT) {
+      return OBJECT_FIELD_TYPE;
+    } else {
+      log.warn("FieldType {} has no mappings implemented", fieldType);
+      return null;
+    }
+  }
+
   /**
    * Populates source field of search query with the sort order as per the criterion provided.
    *
@@ -189,14 +232,39 @@ public static QueryBuilder getQueryBuilderFromCriterion(@Nonnull final Criterion
    * @param sortCriterion {@link SortCriterion} to be applied to the search results
    */
   public static void buildSortOrder(@Nonnull SearchSourceBuilder searchSourceBuilder,
-      @Nullable SortCriterion sortCriterion) {
+      @Nullable SortCriterion sortCriterion, List<EntitySpec> entitySpecs) {
     if (sortCriterion == null) {
       searchSourceBuilder.sort(new ScoreSortBuilder().order(SortOrder.DESC));
     } else {
+      Optional<SearchableAnnotation.FieldType> fieldTypeForDefault = Optional.empty();
+      for (EntitySpec entitySpec : entitySpecs) {
+        List<SearchableFieldSpec> fieldSpecs = entitySpec.getSearchableFieldSpecs();
+        for (SearchableFieldSpec fieldSpec : fieldSpecs) {
+          SearchableAnnotation annotation = fieldSpec.getSearchableAnnotation();
+          if (annotation.getFieldName().equals(sortCriterion.getField())
+              || annotation.getFieldNameAliases().contains(sortCriterion.getField())) {
+            fieldTypeForDefault = Optional.of(fieldSpec.getSearchableAnnotation().getFieldType());
+            break;
+          }
+        }
+        if (fieldTypeForDefault.isPresent()) {
+          break;
+        }
+      }
+      if (fieldTypeForDefault.isEmpty()) {
+        log.warn("Sort criterion field " + sortCriterion.getField() + " was not found in any entity spec to be searched");
+      }
       final SortOrder esSortOrder =
           (sortCriterion.getOrder() == com.linkedin.metadata.query.filter.SortOrder.ASCENDING) ? SortOrder.ASC
               : SortOrder.DESC;
-      searchSourceBuilder.sort(new FieldSortBuilder(sortCriterion.getField()).order(esSortOrder).unmappedType(KEYWORD_TYPE));
+      FieldSortBuilder sortBuilder = new FieldSortBuilder(sortCriterion.getField()).order(esSortOrder);
+      if (fieldTypeForDefault.isPresent()) {
+        String esFieldtype = getElasticTypeForFieldType(fieldTypeForDefault.get());
+        if (esFieldtype != null) {
+          sortBuilder.unmappedType(esFieldtype);
+        }
+      }
+      searchSourceBuilder.sort(sortBuilder);
     }
     if (sortCriterion == null || !sortCriterion.getField().equals(DEFAULT_SEARCH_RESULTS_SORT_BY_FIELD)) {
       searchSourceBuilder.sort(new FieldSortBuilder(DEFAULT_SEARCH_RESULTS_SORT_BY_FIELD).order(SortOrder.ASC));
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java
index 1660504810296..69dd5c80bef1d 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java
@@ -22,12 +22,15 @@
 import com.linkedin.metadata.query.filter.Criterion;
 import com.linkedin.metadata.query.filter.CriterionArray;
 import com.linkedin.metadata.query.filter.Filter;
+import com.linkedin.metadata.query.filter.SortCriterion;
+import com.linkedin.metadata.query.filter.SortOrder;
 import com.linkedin.metadata.search.AggregationMetadata;
 import com.linkedin.metadata.search.ScrollResult;
 import com.linkedin.metadata.search.SearchEntity;
 import com.linkedin.metadata.search.SearchResult;
 import com.linkedin.metadata.search.SearchService;
 import com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig;
+import com.linkedin.metadata.search.utils.ESUtils;
 import com.linkedin.r2.RemoteInvocationException;
 import org.junit.Assert;
 import org.opensearch.client.RequestOptions;
@@ -36,6 +39,9 @@
 import org.opensearch.client.indices.AnalyzeResponse;
 import org.opensearch.client.indices.GetMappingsRequest;
 import org.opensearch.client.indices.GetMappingsResponse;
+import org.opensearch.search.builder.SearchSourceBuilder;
+import org.opensearch.search.sort.FieldSortBuilder;
+import org.opensearch.search.sort.SortBuilder;
 import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
 import org.testng.annotations.Test;
 
@@ -54,11 +60,7 @@
 import static com.linkedin.metadata.Constants.DATA_JOB_ENTITY_NAME;
 import static com.linkedin.metadata.search.elasticsearch.query.request.SearchQueryBuilder.STRUCTURED_QUERY_PREFIX;
 import static com.linkedin.metadata.utils.SearchUtil.AGGREGATION_SEPARATOR_CHAR;
-import static io.datahubproject.test.search.SearchTestUtils.autocomplete;
-import static io.datahubproject.test.search.SearchTestUtils.scroll;
-import static io.datahubproject.test.search.SearchTestUtils.search;
-import static io.datahubproject.test.search.SearchTestUtils.searchAcrossEntities;
-import static io.datahubproject.test.search.SearchTestUtils.searchStructured;
+import static io.datahubproject.test.search.SearchTestUtils.*;
 import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.assertFalse;
 import static org.testng.Assert.assertNotNull;
@@ -174,6 +176,48 @@ public void testSearchFieldConfig() throws IOException {
         }
     }
 
+    @Test
+    public void testGetSortOrder() {
+        String dateFieldName = "lastOperationTime";
+        List<String> entityNamesToTestSearch = List.of("dataset", "chart", "corpgroup");
+        List<EntitySpec> entitySpecs = entityNamesToTestSearch.stream().map(
+                name -> getEntityRegistry().getEntitySpec(name))
+            .collect(Collectors.toList());
+        SearchSourceBuilder builder = new SearchSourceBuilder();
+        SortCriterion sortCriterion = new SortCriterion().setOrder(SortOrder.DESCENDING).setField(dateFieldName);
+        ESUtils.buildSortOrder(builder, sortCriterion, entitySpecs);
+        List<SortBuilder<?>> sorts = builder.sorts();
+        assertEquals(sorts.size(), 2); // sort by last modified and then by urn
+        for (SortBuilder sort : sorts) {
+            assertTrue(sort instanceof FieldSortBuilder);
+            FieldSortBuilder fieldSortBuilder = (FieldSortBuilder) sort;
+            if (fieldSortBuilder.getFieldName().equals(dateFieldName)) {
+                assertEquals(fieldSortBuilder.order(), org.opensearch.search.sort.SortOrder.DESC);
+                assertEquals(fieldSortBuilder.unmappedType(), "date");
+            } else {
+                assertEquals(fieldSortBuilder.getFieldName(), "urn");
+            }
+        }
+
+        // Test alias field
+        String entityNameField = "_entityName";
+        SearchSourceBuilder nameBuilder = new SearchSourceBuilder();
+        SortCriterion nameCriterion = new SortCriterion().setOrder(SortOrder.ASCENDING).setField(entityNameField);
+        ESUtils.buildSortOrder(nameBuilder, nameCriterion, entitySpecs);
+        sorts = nameBuilder.sorts();
+        assertEquals(sorts.size(), 2);
+        for (SortBuilder sort : sorts) {
+            assertTrue(sort instanceof FieldSortBuilder);
+            FieldSortBuilder fieldSortBuilder = (FieldSortBuilder) sort;
+            if (fieldSortBuilder.getFieldName().equals(entityNameField)) {
+                assertEquals(fieldSortBuilder.order(), org.opensearch.search.sort.SortOrder.ASC);
+                assertEquals(fieldSortBuilder.unmappedType(), "keyword");
+            } else {
+                assertEquals(fieldSortBuilder.getFieldName(), "urn");
+            }
+        }
+    }
+
     @Test
     public void testDatasetHasTags() throws IOException {
         GetMappingsRequest req = new GetMappingsRequest()
@@ -1454,6 +1498,16 @@ public void testColumnExactMatch() {
                 "Expected table with column name exact match first");
     }
 
+    @Test
+    public void testSortOrdering() {
+        String query = "unit_data";
+        SortCriterion criterion = new SortCriterion().setOrder(SortOrder.ASCENDING).setField("lastOperationTime");
+        SearchResult result = getSearchService().searchAcrossEntities(SEARCHABLE_ENTITIES, query, null, criterion, 0,
+            100, new SearchFlags().setFulltext(true).setSkipCache(true), null);
+        assertTrue(result.getEntities().size() > 2,
+            String.format("%s - Expected search results to have at least two results", query));
+    }
+
     private Stream<AnalyzeResponse.AnalyzeToken> getTokens(AnalyzeRequest request) throws IOException {
         return getSearchClient().indices().analyze(request, RequestOptions.DEFAULT).getTokens().stream();
     }

From 7855fb60a7e96e6d04d8d96f7505f8b4dd62a7c4 Mon Sep 17 00:00:00 2001
From: Indy Prentice <iprentic@users.noreply.github.com>
Date: Wed, 18 Oct 2023 17:19:10 -0300
Subject: [PATCH 153/156] fix(api): Add preceding / to get index sizes path
 (#9043)

Co-authored-by: Indy Prentice <indy@Indys-MacBook-Pro.local>
---
 .../ElasticSearchTimeseriesAspectService.java    |  2 +-
 .../search/TimeseriesAspectServiceTestBase.java  | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java
index a496fc427138e..3e8f83a531b59 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java
@@ -169,7 +169,7 @@ public List<TimeseriesIndexSizeResult> getIndexSizes() {
     List<TimeseriesIndexSizeResult> res = new ArrayList<>();
     try {
       String indicesPattern = _indexConvention.getAllTimeseriesAspectIndicesPattern();
-      Response r = _searchClient.getLowLevelClient().performRequest(new Request("GET", indicesPattern + "/_stats"));
+      Response r = _searchClient.getLowLevelClient().performRequest(new Request("GET", "/" + indicesPattern + "/_stats"));
       JsonNode body = new ObjectMapper().readTree(r.getEntity().getContent());
       body.get("indices").fields().forEachRemaining(entry -> {
         TimeseriesIndexSizeResult elemResult = new TimeseriesIndexSizeResult();
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java
index cc60ba8679e1f..f9b8f84b10ad2 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java
@@ -45,6 +45,7 @@
 import com.linkedin.timeseries.GroupingBucket;
 import com.linkedin.timeseries.GroupingBucketType;
 import com.linkedin.timeseries.TimeWindowSize;
+import com.linkedin.timeseries.TimeseriesIndexSizeResult;
 import org.opensearch.client.RestHighLevelClient;
 import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
 import org.testng.annotations.BeforeClass;
@@ -884,4 +885,19 @@ public void testCountByFilterAfterDelete() throws InterruptedException {
         _elasticSearchTimeseriesAspectService.countByFilter(ENTITY_NAME, ASPECT_NAME, urnAndTimeFilter);
     assertEquals(count, 0L);
   }
+
+  @Test(groups = {"getAggregatedStats"}, dependsOnGroups = {"upsert"})
+  public void testGetIndexSizes() {
+    List<TimeseriesIndexSizeResult> result = _elasticSearchTimeseriesAspectService.getIndexSizes();
+    /*
+    Example result:
+    {aspectName=testentityprofile, sizeMb=52.234, indexName=es_timeseries_aspect_service_test_testentity_testentityprofileaspect_v1, entityName=testentity}
+    {aspectName=testentityprofile, sizeMb=0.208, indexName=es_timeseries_aspect_service_test_testentitywithouttests_testentityprofileaspect_v1, entityName=testentitywithouttests}
+     */
+    // There may be other indices in there from other tests, so just make sure that index for entity + aspect is in there
+    assertTrue(result.size() > 1);
+    assertTrue(
+        result.stream().anyMatch(idxSizeResult -> idxSizeResult.getIndexName().equals(
+            "es_timeseries_aspect_service_test_testentitywithouttests_testentityprofileaspect_v1")));
+  }
 }

From 409f981fd3e12a1d470a79cb091ac92e1a4a2c46 Mon Sep 17 00:00:00 2001
From: Indy Prentice <iprentic@users.noreply.github.com>
Date: Wed, 18 Oct 2023 18:25:54 -0300
Subject: [PATCH 154/156] fix(search): Apply SearchFlags passed in through to
 scroll queries (#9041)

Co-authored-by: Indy Prentice <indy@Indys-MacBook-Pro.local>
---
 .../client/CachingEntitySearchService.java    | 13 ++++++----
 .../elasticsearch/ElasticSearchService.java   | 13 ++++++----
 .../query/request/SearchRequestHandler.java   |  4 +++-
 .../search/LineageServiceTestBase.java        | 16 ++++++++++---
 .../request/SearchRequestHandlerTest.java     | 24 +++++++++++++++++++
 .../metadata/search/EntitySearchService.java  |  6 +++--
 6 files changed, 60 insertions(+), 16 deletions(-)

diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java
index 13a7d16b723a7..ceaf37a1289d9 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java
@@ -256,13 +256,13 @@ public ScrollResult getCachedScrollResults(
         cacheAccess.stop();
         if (result == null) {
           Timer.Context cacheMiss = MetricUtils.timer(this.getClass(), "scroll_cache_miss").time();
-          result = getRawScrollResults(entities, query, filters, sortCriterion, scrollId, keepAlive, size, isFullText);
+          result = getRawScrollResults(entities, query, filters, sortCriterion, scrollId, keepAlive, size, isFullText, flags);
           cache.put(cacheKey, toJsonString(result));
           cacheMiss.stop();
           MetricUtils.counter(this.getClass(), "scroll_cache_miss_count").inc();
         }
       } else {
-        result = getRawScrollResults(entities, query, filters, sortCriterion, scrollId, keepAlive, size, isFullText);
+        result = getRawScrollResults(entities, query, filters, sortCriterion, scrollId, keepAlive, size, isFullText, flags);
       }
       return result;
     }
@@ -328,7 +328,8 @@ private ScrollResult getRawScrollResults(
       @Nullable final String scrollId,
       @Nullable final String keepAlive,
       final int count,
-      final boolean fulltext) {
+      final boolean fulltext,
+      @Nullable final SearchFlags searchFlags) {
     if (fulltext) {
       return entitySearchService.fullTextScroll(
           entities,
@@ -337,7 +338,8 @@ private ScrollResult getRawScrollResults(
           sortCriterion,
           scrollId,
           keepAlive,
-          count);
+          count,
+          searchFlags);
     } else {
       return entitySearchService.structuredScroll(entities,
           input,
@@ -345,7 +347,8 @@ private ScrollResult getRawScrollResults(
           sortCriterion,
           scrollId,
           keepAlive,
-          count);
+          count,
+          searchFlags);
     }
   }
 
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java
index ef5a555e95ba8..024cf2b0abec2 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java
@@ -175,23 +175,26 @@ public List<String> getBrowsePaths(@Nonnull String entityName, @Nonnull Urn urn)
   @Nonnull
   @Override
   public ScrollResult fullTextScroll(@Nonnull List<String> entities, @Nonnull String input, @Nullable Filter postFilters,
-      @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nullable String keepAlive, int size) {
+      @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nullable String keepAlive, int size, @Nullable SearchFlags searchFlags) {
     log.debug(String.format(
         "Scrolling Structured Search documents entities: %s, input: %s, postFilters: %s, sortCriterion: %s, scrollId: %s, size: %s",
         entities, input, postFilters, sortCriterion, scrollId, size));
+    SearchFlags flags = Optional.ofNullable(searchFlags).orElse(new SearchFlags());
+    flags.setFulltext(true);
     return esSearchDAO.scroll(entities, input, postFilters, sortCriterion, scrollId, keepAlive, size,
-            new SearchFlags().setFulltext(true));
+            flags);
   }
 
   @Nonnull
   @Override
   public ScrollResult structuredScroll(@Nonnull List<String> entities, @Nonnull String input, @Nullable Filter postFilters,
-      @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nullable String keepAlive, int size) {
+      @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nullable String keepAlive, int size, @Nullable SearchFlags searchFlags) {
     log.debug(String.format(
         "Scrolling FullText Search documents entities: %s, input: %s, postFilters: %s, sortCriterion: %s, scrollId: %s, size: %s",
         entities, input, postFilters, sortCriterion, scrollId, size));
-    return esSearchDAO.scroll(entities, input, postFilters, sortCriterion, scrollId, keepAlive, size,
-            new SearchFlags().setFulltext(false));
+    SearchFlags flags = Optional.ofNullable(searchFlags).orElse(new SearchFlags());
+    flags.setFulltext(false);
+    return esSearchDAO.scroll(entities, input, postFilters, sortCriterion, scrollId, keepAlive, size, flags);
   }
 
   public Optional<SearchResponse> raw(@Nonnull String indexName, @Nullable String jsonQuery) {
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java
index c06907e800d5e..49571a60d5f21 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java
@@ -242,7 +242,9 @@ public SearchRequest getSearchRequest(@Nonnull String input, @Nullable Filter fi
     BoolQueryBuilder filterQuery = getFilterQuery(filter);
     searchSourceBuilder.query(QueryBuilders.boolQuery().must(getQuery(input, finalSearchFlags.isFulltext())).filter(filterQuery));
     _aggregationQueryBuilder.getAggregations().forEach(searchSourceBuilder::aggregation);
-    searchSourceBuilder.highlighter(getHighlights());
+    if (!finalSearchFlags.isSkipHighlighting()) {
+      searchSourceBuilder.highlighter(_highlights);
+    }
     ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs);
     searchRequest.source(searchSourceBuilder);
     log.debug("Search request is: " + searchRequest);
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java
index 461a146022446..696e3b62834bd 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java
@@ -47,8 +47,10 @@
 import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
 import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl;
 import org.junit.Assert;
+import org.mockito.ArgumentCaptor;
 import org.mockito.Mockito;
 import org.opensearch.client.RestHighLevelClient;
+import org.opensearch.action.search.SearchRequest;
 import org.springframework.cache.CacheManager;
 import org.springframework.cache.concurrent.ConcurrentMapCacheManager;
 import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
@@ -108,6 +110,7 @@ abstract public class LineageServiceTestBase extends AbstractTestNGSpringContext
   private GraphService _graphService;
   private CacheManager _cacheManager;
   private LineageSearchService _lineageSearchService;
+  private RestHighLevelClient _searchClientSpy;
 
   private static final String ENTITY_NAME = "testEntity";
   private static final Urn TEST_URN = TestEntityUtil.getTestEntityUrn();
@@ -162,10 +165,11 @@ private ElasticSearchService buildEntitySearchService() {
     EntityIndexBuilders indexBuilders =
         new EntityIndexBuilders(getIndexBuilder(), _entityRegistry,
             _indexConvention, _settingsBuilder);
-    ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, getSearchClient(), _indexConvention, false,
+    _searchClientSpy = spy(getSearchClient());
+    ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, _searchClientSpy, _indexConvention, false,
         ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), null);
-    ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, getSearchClient(), _indexConvention, getSearchConfiguration(), getCustomSearchConfiguration());
-    ESWriteDAO writeDAO = new ESWriteDAO(_entityRegistry, getSearchClient(), _indexConvention, getBulkProcessor(), 1);
+    ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, _searchClientSpy, _indexConvention, getSearchConfiguration(), getCustomSearchConfiguration());
+    ESWriteDAO writeDAO = new ESWriteDAO(_entityRegistry, _searchClientSpy, _indexConvention, getBulkProcessor(), 1);
     return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO);
   }
 
@@ -246,9 +250,15 @@ public void testSearchService() throws Exception {
     _elasticSearchService.upsertDocument(ENTITY_NAME, document2.toString(), urn2.toString());
     syncAfterWrite(getBulkProcessor());
 
+    Mockito.reset(_searchClientSpy);
     searchResult = searchAcrossLineage(null, TEST1);
     assertEquals(searchResult.getNumEntities().intValue(), 1);
     assertEquals(searchResult.getEntities().get(0).getEntity(), urn);
+    // Verify that highlighting was turned off in the query
+    ArgumentCaptor<SearchRequest> searchRequestCaptor = ArgumentCaptor.forClass(SearchRequest.class);
+    Mockito.verify(_searchClientSpy, times(1)).search(searchRequestCaptor.capture(), any());
+    SearchRequest capturedRequest = searchRequestCaptor.getValue();
+    assertNull(capturedRequest.source().highlighter());
     clearCache(false);
 
     when(_graphService.getLineage(eq(TEST_URN), eq(LineageDirection.DOWNSTREAM), anyInt(), anyInt(),
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
index 90c6c523c588f..0ea035a10f91d 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java
@@ -97,6 +97,30 @@ public void testDatasetFieldsAndHighlights() {
     ), "unexpected lineage fields in highlights: " + highlightFields);
   }
 
+  @Test
+  public void testSearchRequestHandlerHighlightingTurnedOff() {
+    SearchRequestHandler requestHandler = SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null);
+    SearchRequest searchRequest = requestHandler.getSearchRequest("testQuery", null, null, 0,
+        10,  new SearchFlags().setFulltext(false).setSkipHighlighting(true), null);
+    SearchSourceBuilder sourceBuilder = searchRequest.source();
+    assertEquals(sourceBuilder.from(), 0);
+    assertEquals(sourceBuilder.size(), 10);
+    // Filters
+    Collection<AggregationBuilder> aggBuilders = sourceBuilder.aggregations().getAggregatorFactories();
+    // Expect 2 aggregations: textFieldOverride and _index
+    assertEquals(aggBuilders.size(), 2);
+    for (AggregationBuilder aggBuilder : aggBuilders) {
+      if (aggBuilder.getName().equals("textFieldOverride")) {
+        TermsAggregationBuilder filterPanelBuilder = (TermsAggregationBuilder) aggBuilder;
+        assertEquals(filterPanelBuilder.field(), "textFieldOverride.keyword");
+      } else if (!aggBuilder.getName().equals("_entityType")) {
+        fail("Found unexepected aggregation: " + aggBuilder.getName());
+      }
+    }
+    // Highlights should not be present
+    assertNull(sourceBuilder.highlighter());
+  }
+
   @Test
   public void testSearchRequestHandler() {
     SearchRequestHandler requestHandler = SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null);
diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java
index a46b58aabfb0b..64f59780b887f 100644
--- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java
+++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java
@@ -188,11 +188,12 @@ BrowseResult browse(@Nonnull String entityName, @Nonnull String path, @Nullable
    * @param sortCriterion {@link SortCriterion} to be applied to search results
    * @param scrollId opaque scroll identifier to pass to search service
    * @param size the number of search hits to return
+   * @param searchFlags flags controlling search options
    * @return a {@link ScrollResult} that contains a list of matched documents and related search result metadata
    */
   @Nonnull
   ScrollResult fullTextScroll(@Nonnull List<String> entities, @Nonnull String input, @Nullable Filter postFilters,
-      @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nonnull String keepAlive, int size);
+      @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nonnull String keepAlive, int size, @Nullable SearchFlags searchFlags);
 
   /**
    * Gets a list of documents that match given search request. The results are aggregated and filters are applied to the
@@ -204,11 +205,12 @@ ScrollResult fullTextScroll(@Nonnull List<String> entities, @Nonnull String inpu
    * @param sortCriterion {@link SortCriterion} to be applied to search results
    * @param scrollId opaque scroll identifier to pass to search service
    * @param size the number of search hits to return
+   * @param searchFlags flags controlling search options
    * @return a {@link ScrollResult} that contains a list of matched documents and related search result metadata
    */
   @Nonnull
   ScrollResult structuredScroll(@Nonnull List<String> entities, @Nonnull String input, @Nullable Filter postFilters,
-      @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nonnull String keepAlive, int size);
+      @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nonnull String keepAlive, int size, @Nullable SearchFlags searchFlags);
 
   /**
    * Max result size returned by the underlying search backend

From 269c4eac7ef09d73224050e432bfbf60727e4d65 Mon Sep 17 00:00:00 2001
From: Pedro Silva <pedro@acryl.io>
Date: Thu, 19 Oct 2023 01:43:05 +0100
Subject: [PATCH 155/156] fix(ownership): Corrects validation of ownership type
 and makes it consistent across graphQL calls (#9044)

Co-authored-by: Ellie O'Neil <oneile729@gmail.com>
---
 .../resolvers/mutate/AddOwnerResolver.java    |  27 ++-
 .../resolvers/mutate/AddOwnersResolver.java   |   2 +-
 .../mutate/BatchAddOwnersResolver.java        |   3 +-
 .../resolvers/mutate/util/OwnerUtils.java     |  65 +++-----
 .../owner/AddOwnersResolverTest.java          | 157 ++++++++++++++++--
 5 files changed, 183 insertions(+), 71 deletions(-)

diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java
index 5ca7007d98e43..3f2dab0a5ba71 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java
@@ -2,14 +2,11 @@
 
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.urn.CorpuserUrn;
-
 import com.linkedin.common.urn.Urn;
 import com.linkedin.datahub.graphql.QueryContext;
 import com.linkedin.datahub.graphql.exception.AuthorizationException;
 import com.linkedin.datahub.graphql.generated.AddOwnerInput;
-import com.linkedin.datahub.graphql.generated.OwnerEntityType;
 import com.linkedin.datahub.graphql.generated.OwnerInput;
-import com.linkedin.datahub.graphql.generated.OwnershipType;
 import com.linkedin.datahub.graphql.generated.ResourceRefInput;
 import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils;
 import com.linkedin.metadata.entity.EntityService;
@@ -20,7 +17,6 @@
 import lombok.extern.slf4j.Slf4j;
 
 import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*;
-import static com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils.*;
 
 
 @Slf4j
@@ -32,30 +28,33 @@ public class AddOwnerResolver implements DataFetcher<CompletableFuture<Boolean>>
   @Override
   public CompletableFuture<Boolean> get(DataFetchingEnvironment environment) throws Exception {
     final AddOwnerInput input = bindArgument(environment.getArgument("input"), AddOwnerInput.class);
-
     Urn ownerUrn = Urn.createFromString(input.getOwnerUrn());
-    OwnerEntityType ownerEntityType = input.getOwnerEntityType();
-    OwnershipType type = input.getType() == null ? OwnershipType.NONE : input.getType();
-    String ownershipUrn = input.getOwnershipTypeUrn() == null ? mapOwnershipTypeToEntity(type.name()) : input.getOwnershipTypeUrn();
     Urn targetUrn = Urn.createFromString(input.getResourceUrn());
+    OwnerInput.Builder ownerInputBuilder = OwnerInput.builder();
+    ownerInputBuilder.setOwnerUrn(input.getOwnerUrn());
+    ownerInputBuilder.setOwnerEntityType(input.getOwnerEntityType());
+    if (input.getType() != null) {
+      ownerInputBuilder.setType(input.getType());
+    }
+    if (input.getOwnershipTypeUrn() != null) {
+      ownerInputBuilder.setOwnershipTypeUrn(input.getOwnershipTypeUrn());
+    }
 
+    OwnerInput ownerInput = ownerInputBuilder.build();
     if (!OwnerUtils.isAuthorizedToUpdateOwners(environment.getContext(), targetUrn)) {
       throw new AuthorizationException("Unauthorized to perform this action. Please contact your DataHub administrator.");
     }
 
     return CompletableFuture.supplyAsync(() -> {
-      OwnerUtils.validateAddInput(
-          ownerUrn, input.getOwnershipTypeUrn(), ownerEntityType,
-          targetUrn,
-          _entityService
-      );
+      OwnerUtils.validateAddOwnerInput(ownerInput, ownerUrn, _entityService);
+
       try {
 
         log.debug("Adding Owner. input: {}", input);
 
         Urn actor = CorpuserUrn.createFromString(((QueryContext) environment.getContext()).getActorUrn());
         OwnerUtils.addOwnersToResources(
-            ImmutableList.of(new OwnerInput(input.getOwnerUrn(), ownerEntityType, type, ownershipUrn)),
+            ImmutableList.of(ownerInput),
             ImmutableList.of(new ResourceRefInput(input.getResourceUrn(), null, null)),
             actor,
             _entityService
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java
index 06424efa83819..4e5b5bdb2a651 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java
@@ -39,7 +39,7 @@ public CompletableFuture<Boolean> get(DataFetchingEnvironment environment) throw
         throw new AuthorizationException("Unauthorized to perform this action. Please contact your DataHub administrator.");
       }
 
-      OwnerUtils.validateAddInput(
+      OwnerUtils.validateAddOwnerInput(
           owners,
           targetUrn,
           _entityService
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java
index 019c044d81ab3..5beaeecae673f 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java
@@ -53,8 +53,7 @@ public CompletableFuture<Boolean> get(DataFetchingEnvironment environment) throw
 
   private void validateOwners(List<OwnerInput> owners) {
     for (OwnerInput ownerInput : owners) {
-      OwnerUtils.validateOwner(UrnUtils.getUrn(ownerInput.getOwnerUrn()), ownerInput.getOwnerEntityType(),
-          UrnUtils.getUrn(ownerInput.getOwnershipTypeUrn()), _entityService);
+      OwnerUtils.validateOwner(ownerInput, _entityService);
     }
   }
 
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java
index d2f7f896e5953..7233995804423 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java
@@ -50,7 +50,7 @@ public static void addOwnersToResources(
   ) {
     final List<MetadataChangeProposal> changes = new ArrayList<>();
     for (ResourceRefInput resource : resources) {
-      changes.add(buildAddOwnersProposal(owners, UrnUtils.getUrn(resource.getResourceUrn()), actor, entityService));
+      changes.add(buildAddOwnersProposal(owners, UrnUtils.getUrn(resource.getResourceUrn()), entityService));
     }
     EntityUtils.ingestChangeProposals(changes, entityService, actor, false);
   }
@@ -69,7 +69,7 @@ public static void removeOwnersFromResources(
   }
 
 
-  private static MetadataChangeProposal buildAddOwnersProposal(List<OwnerInput> owners, Urn resourceUrn, Urn actor, EntityService entityService) {
+  static MetadataChangeProposal buildAddOwnersProposal(List<OwnerInput> owners, Urn resourceUrn, EntityService entityService) {
     Ownership ownershipAspect = (Ownership) EntityUtils.getAspectFromEntity(
         resourceUrn.toString(),
         Constants.OWNERSHIP_ASPECT_NAME, entityService,
@@ -181,18 +181,13 @@ public static boolean isAuthorizedToUpdateOwners(@Nonnull QueryContext context,
         orPrivilegeGroups);
   }
 
-  public static Boolean validateAddInput(
+  public static Boolean validateAddOwnerInput(
       List<OwnerInput> owners,
       Urn resourceUrn,
       EntityService entityService
   ) {
     for (OwnerInput owner : owners) {
-      boolean result = validateAddInput(
-          UrnUtils.getUrn(owner.getOwnerUrn()),
-          owner.getOwnershipTypeUrn(),
-          owner.getOwnerEntityType(),
-          resourceUrn,
-          entityService);
+      boolean result = validateAddOwnerInput(owner, resourceUrn, entityService);
       if (!result) {
         return false;
       }
@@ -200,44 +195,29 @@ public static Boolean validateAddInput(
     return true;
   }
 
-  public static Boolean validateAddInput(
-      Urn ownerUrn,
-      String ownershipEntityUrn,
-      OwnerEntityType ownerEntityType,
+  public static Boolean validateAddOwnerInput(
+      OwnerInput owner,
       Urn resourceUrn,
       EntityService entityService
   ) {
 
-    if (OwnerEntityType.CORP_GROUP.equals(ownerEntityType) && !Constants.CORP_GROUP_ENTITY_NAME.equals(ownerUrn.getEntityType())) {
-      throw new IllegalArgumentException(String.format("Failed to change ownership for resource %s. Expected a corp group urn.", resourceUrn));
-    }
-
-    if (OwnerEntityType.CORP_USER.equals(ownerEntityType) && !Constants.CORP_USER_ENTITY_NAME.equals(ownerUrn.getEntityType())) {
-      throw new IllegalArgumentException(String.format("Failed to change ownership for resource %s. Expected a corp user urn.", resourceUrn));
-    }
-
     if (!entityService.exists(resourceUrn)) {
       throw new IllegalArgumentException(String.format("Failed to change ownership for resource %s. Resource does not exist.", resourceUrn));
     }
 
-    if (!entityService.exists(ownerUrn)) {
-      throw new IllegalArgumentException(String.format("Failed to change ownership for resource %s. Owner %s does not exist.", resourceUrn, ownerUrn));
-    }
-
-    if (ownershipEntityUrn != null && !entityService.exists(UrnUtils.getUrn(ownershipEntityUrn))) {
-      throw new IllegalArgumentException(String.format("Failed to change ownership type for resource %s. Ownership Type "
-          + "%s does not exist.", resourceUrn, ownershipEntityUrn));
-    }
+    validateOwner(owner, entityService);
 
     return true;
   }
 
   public static void validateOwner(
-      Urn ownerUrn,
-      OwnerEntityType ownerEntityType,
-      Urn ownershipEntityUrn,
+      OwnerInput owner,
       EntityService entityService
   ) {
+
+    OwnerEntityType ownerEntityType = owner.getOwnerEntityType();
+    Urn ownerUrn = UrnUtils.getUrn(owner.getOwnerUrn());
+
     if (OwnerEntityType.CORP_GROUP.equals(ownerEntityType) && !Constants.CORP_GROUP_ENTITY_NAME.equals(ownerUrn.getEntityType())) {
       throw new IllegalArgumentException(
           String.format("Failed to change ownership for resource(s). Expected a corp group urn, found %s", ownerUrn));
@@ -252,9 +232,14 @@ public static void validateOwner(
       throw new IllegalArgumentException(String.format("Failed to change ownership for resource(s). Owner with urn %s does not exist.", ownerUrn));
     }
 
-    if (!entityService.exists(ownershipEntityUrn)) {
-      throw new IllegalArgumentException(String.format("Failed to change ownership for resource(s). Ownership type with "
-          + "urn %s does not exist.", ownershipEntityUrn));
+    if (owner.getOwnershipTypeUrn() != null && !entityService.exists(UrnUtils.getUrn(owner.getOwnershipTypeUrn()))) {
+      throw new IllegalArgumentException(String.format("Failed to change ownership for resource(s). Custom Ownership type with "
+          + "urn %s does not exist.", owner.getOwnershipTypeUrn()));
+    }
+
+    if (owner.getType() == null && owner.getOwnershipTypeUrn() == null) {
+      throw new IllegalArgumentException("Failed to change ownership for resource(s). Expected either "
+          + "type or ownershipTypeUrn to be specified.");
     }
   }
 
@@ -269,11 +254,11 @@ public static Boolean validateRemoveInput(
   }
 
   public static void addCreatorAsOwner(
-    QueryContext context,
-    String urn,
-    OwnerEntityType ownerEntityType,
-    OwnershipType ownershipType,
-    EntityService entityService) {
+      QueryContext context,
+      String urn,
+      OwnerEntityType ownerEntityType,
+      OwnershipType ownershipType,
+      EntityService entityService) {
     try {
       Urn actorUrn = CorpuserUrn.createFromString(context.getActorUrn());
       String ownershipTypeUrn = mapOwnershipTypeToEntity(ownershipType.name());
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java
index efc0c5dfcf36d..329d71ec125db 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java
@@ -2,6 +2,11 @@
 
 import com.google.common.collect.ImmutableList;
 import com.linkedin.common.AuditStamp;
+import com.linkedin.common.Owner;
+import com.linkedin.common.OwnerArray;
+import com.linkedin.common.Ownership;
+import com.linkedin.common.OwnershipSource;
+import com.linkedin.common.OwnershipSourceType;
 import com.linkedin.common.urn.Urn;
 import com.linkedin.common.urn.UrnUtils;
 import com.linkedin.datahub.graphql.QueryContext;
@@ -28,6 +33,7 @@ public class AddOwnersResolverTest {
   private static final String TEST_ENTITY_URN = "urn:li:dataset:(urn:li:dataPlatform:mysql,my-test,PROD)";
   private static final String TEST_OWNER_1_URN = "urn:li:corpuser:test-id-1";
   private static final String TEST_OWNER_2_URN = "urn:li:corpuser:test-id-2";
+  private static final String TEST_OWNER_3_URN = "urn:li:corpGroup:test-id-3";
 
   @Test
   public void testGetSuccessNoExistingOwners() throws Exception {
@@ -75,33 +81,41 @@ public void testGetSuccessNoExistingOwners() throws Exception {
   }
 
   @Test
-  public void testGetSuccessExistingOwners() throws Exception {
+  public void testGetSuccessExistingOwnerNewType() throws Exception {
     EntityService mockService = getMockEntityService();
 
+    com.linkedin.common.Ownership oldOwnership = new Ownership().setOwners(new OwnerArray(
+            ImmutableList.of(new Owner()
+                    .setOwner(UrnUtils.getUrn(TEST_OWNER_1_URN))
+                    .setType(com.linkedin.common.OwnershipType.NONE)
+                    .setSource(new OwnershipSource().setType(OwnershipSourceType.MANUAL))
+            )));
+
     Mockito.when(mockService.getAspect(
-        Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN)),
-        Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME),
-        Mockito.eq(0L)))
-        .thenReturn(null);
+                    Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN)),
+                    Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME),
+                    Mockito.eq(0L)))
+            .thenReturn(oldOwnership);
 
     Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN))).thenReturn(true);
     Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_1_URN))).thenReturn(true);
-    Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_2_URN))).thenReturn(true);
 
     Mockito.when(mockService.exists(Urn.createFromString(
-            OwnerUtils.mapOwnershipTypeToEntity(com.linkedin.datahub.graphql.generated.OwnershipType.TECHNICAL_OWNER.name()))))
-        .thenReturn(true);
+                    OwnerUtils.mapOwnershipTypeToEntity(com.linkedin.datahub.graphql.generated.OwnershipType.TECHNICAL_OWNER.name()))))
+            .thenReturn(true);
 
     AddOwnersResolver resolver = new AddOwnersResolver(mockService);
 
     // Execute resolver
     QueryContext mockContext = getMockAllowContext();
     DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class);
+
     AddOwnersInput input = new AddOwnersInput(ImmutableList.of(
-        new OwnerInput(TEST_OWNER_1_URN, OwnerEntityType.CORP_USER, OwnershipType.TECHNICAL_OWNER,
-            OwnerUtils.mapOwnershipTypeToEntity(OwnershipType.TECHNICAL_OWNER.name())),
-        new OwnerInput(TEST_OWNER_2_URN, OwnerEntityType.CORP_USER, OwnershipType.TECHNICAL_OWNER,
-            OwnerUtils.mapOwnershipTypeToEntity(OwnershipType.TECHNICAL_OWNER.name()))
+            OwnerInput.builder()
+                    .setOwnerUrn(TEST_OWNER_1_URN)
+                    .setOwnershipTypeUrn(OwnerUtils.mapOwnershipTypeToEntity(OwnershipType.TECHNICAL_OWNER.name()))
+                    .setOwnerEntityType(OwnerEntityType.CORP_USER)
+                    .build()
     ), TEST_ENTITY_URN);
     Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input);
     Mockito.when(mockEnv.getContext()).thenReturn(mockContext);
@@ -111,11 +125,126 @@ public void testGetSuccessExistingOwners() throws Exception {
     verifyIngestProposal(mockService, 1);
 
     Mockito.verify(mockService, Mockito.times(1)).exists(
-        Mockito.eq(Urn.createFromString(TEST_OWNER_1_URN))
+            Mockito.eq(Urn.createFromString(TEST_OWNER_1_URN))
     );
+  }
+
+  @Test
+  public void testGetSuccessDeprecatedTypeToOwnershipType() throws Exception {
+    EntityService mockService = getMockEntityService();
+
+    com.linkedin.common.Ownership oldOwnership = new Ownership().setOwners(new OwnerArray(
+            ImmutableList.of(new Owner()
+                    .setOwner(UrnUtils.getUrn(TEST_OWNER_1_URN))
+                    .setType(com.linkedin.common.OwnershipType.TECHNICAL_OWNER)
+                    .setSource(new OwnershipSource().setType(OwnershipSourceType.MANUAL))
+            )));
+
+    Mockito.when(mockService.getAspect(
+                    Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN)),
+                    Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME),
+                    Mockito.eq(0L)))
+            .thenReturn(oldOwnership);
+
+    Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN))).thenReturn(true);
+    Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_1_URN))).thenReturn(true);
+
+    Mockito.when(mockService.exists(Urn.createFromString(
+                    OwnerUtils.mapOwnershipTypeToEntity(com.linkedin.datahub.graphql.generated.OwnershipType.TECHNICAL_OWNER.name()))))
+            .thenReturn(true);
+
+    AddOwnersResolver resolver = new AddOwnersResolver(mockService);
+
+    // Execute resolver
+    QueryContext mockContext = getMockAllowContext();
+    DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class);
+
+    AddOwnersInput input = new AddOwnersInput(ImmutableList.of(OwnerInput.builder()
+                    .setOwnerUrn(TEST_OWNER_1_URN)
+                    .setOwnershipTypeUrn(OwnerUtils.mapOwnershipTypeToEntity(OwnershipType.TECHNICAL_OWNER.name()))
+                    .setOwnerEntityType(OwnerEntityType.CORP_USER)
+                    .build()
+    ), TEST_ENTITY_URN);
+    Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input);
+    Mockito.when(mockEnv.getContext()).thenReturn(mockContext);
+    assertTrue(resolver.get(mockEnv).get());
+
+    // Unable to easily validate exact payload due to the injected timestamp
+    verifyIngestProposal(mockService, 1);
 
     Mockito.verify(mockService, Mockito.times(1)).exists(
-        Mockito.eq(Urn.createFromString(TEST_OWNER_2_URN))
+            Mockito.eq(Urn.createFromString(TEST_OWNER_1_URN))
+    );
+  }
+
+  @Test
+  public void testGetSuccessMultipleOwnerTypes() throws Exception {
+    EntityService mockService = getMockEntityService();
+
+    com.linkedin.common.Ownership oldOwnership = new Ownership().setOwners(new OwnerArray(
+            ImmutableList.of(new Owner()
+                    .setOwner(UrnUtils.getUrn(TEST_OWNER_1_URN))
+                    .setType(com.linkedin.common.OwnershipType.NONE)
+                    .setSource(new OwnershipSource().setType(OwnershipSourceType.MANUAL))
+            )));
+
+    Mockito.when(mockService.getAspect(
+                    Mockito.eq(UrnUtils.getUrn(TEST_ENTITY_URN)),
+                    Mockito.eq(Constants.OWNERSHIP_ASPECT_NAME),
+                    Mockito.eq(0L)))
+            .thenReturn(oldOwnership);
+
+    Mockito.when(mockService.exists(Urn.createFromString(TEST_ENTITY_URN))).thenReturn(true);
+    Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_1_URN))).thenReturn(true);
+    Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_2_URN))).thenReturn(true);
+    Mockito.when(mockService.exists(Urn.createFromString(TEST_OWNER_3_URN))).thenReturn(true);
+
+    Mockito.when(mockService.exists(Urn.createFromString(
+                    OwnerUtils.mapOwnershipTypeToEntity(com.linkedin.datahub.graphql.generated.OwnershipType.TECHNICAL_OWNER.name()))))
+            .thenReturn(true);
+    Mockito.when(mockService.exists(Urn.createFromString(
+                    OwnerUtils.mapOwnershipTypeToEntity(com.linkedin.datahub.graphql.generated.OwnershipType.BUSINESS_OWNER.name()))))
+            .thenReturn(true);
+
+    AddOwnersResolver resolver = new AddOwnersResolver(mockService);
+
+    // Execute resolver
+    QueryContext mockContext = getMockAllowContext();
+    DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class);
+
+    AddOwnersInput input = new AddOwnersInput(ImmutableList.of(OwnerInput.builder()
+                    .setOwnerUrn(TEST_OWNER_1_URN)
+                    .setOwnershipTypeUrn(OwnerUtils.mapOwnershipTypeToEntity(OwnershipType.TECHNICAL_OWNER.name()))
+                    .setOwnerEntityType(OwnerEntityType.CORP_USER)
+                    .build(),
+            OwnerInput.builder()
+                    .setOwnerUrn(TEST_OWNER_2_URN)
+                    .setOwnershipTypeUrn(OwnerUtils.mapOwnershipTypeToEntity(OwnershipType.BUSINESS_OWNER.name()))
+                    .setOwnerEntityType(OwnerEntityType.CORP_USER)
+                    .build(),
+            OwnerInput.builder()
+                    .setOwnerUrn(TEST_OWNER_3_URN)
+                    .setOwnershipTypeUrn(OwnerUtils.mapOwnershipTypeToEntity(OwnershipType.TECHNICAL_OWNER.name()))
+                    .setOwnerEntityType(OwnerEntityType.CORP_GROUP)
+                    .build()
+    ), TEST_ENTITY_URN);
+    Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input);
+    Mockito.when(mockEnv.getContext()).thenReturn(mockContext);
+    assertTrue(resolver.get(mockEnv).get());
+
+    // Unable to easily validate exact payload due to the injected timestamp
+    verifyIngestProposal(mockService, 1);
+
+    Mockito.verify(mockService, Mockito.times(1)).exists(
+            Mockito.eq(Urn.createFromString(TEST_OWNER_1_URN))
+    );
+
+    Mockito.verify(mockService, Mockito.times(1)).exists(
+            Mockito.eq(Urn.createFromString(TEST_OWNER_2_URN))
+    );
+
+    Mockito.verify(mockService, Mockito.times(1)).exists(
+            Mockito.eq(Urn.createFromString(TEST_OWNER_3_URN))
     );
   }
 

From 75b36c41ee4fd74891b1bfe37885b4cd840e2906 Mon Sep 17 00:00:00 2001
From: Ellie O'Neil <110510035+eboneil@users.noreply.github.com>
Date: Thu, 19 Oct 2023 08:32:24 -0700
Subject: [PATCH 156/156] docs(protobuf) Update messaging around nesting
 messages (#9048)

---
 metadata-integration/java/datahub-protobuf/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-integration/java/datahub-protobuf/README.md b/metadata-integration/java/datahub-protobuf/README.md
index daea8d438679c..29b82aa3e68f5 100644
--- a/metadata-integration/java/datahub-protobuf/README.md
+++ b/metadata-integration/java/datahub-protobuf/README.md
@@ -1,6 +1,6 @@
 # Protobuf Schemas
 
-The `datahub-protobuf` module is designed to be used with the Java Emitter, the input is a compiled protobuf binary `*.protoc` files and optionally the corresponding `*.proto` source code. In addition, you can supply the root message in cases where a single protobuf source file includes multiple non-nested messages.
+The `datahub-protobuf` module is designed to be used with the Java Emitter, the input is a compiled protobuf binary `*.protoc` files and optionally the corresponding `*.proto` source code. You can supply a file with multiple nested messages to be processed. If you have a file with multiple non-nested messages, you will need to separate them out into different files or supply the root message, as otherwise we will only process the first one.
 
 ## Supported Features